summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--COPYING361
-rw-r--r--Documentation/Makefile54
-rw-r--r--Documentation/cvs-migration.txt232
-rw-r--r--Documentation/diff-format.txt135
-rw-r--r--Documentation/diff-options.txt53
-rw-r--r--Documentation/diffcore.txt248
-rw-r--r--Documentation/git-apply-patch-script.txt32
-rw-r--r--Documentation/git-apply.txt76
-rw-r--r--Documentation/git-cat-file.txt61
-rw-r--r--Documentation/git-check-files.txt50
-rw-r--r--Documentation/git-checkout-cache.txt106
-rw-r--r--Documentation/git-clone-pack.txt65
-rw-r--r--Documentation/git-clone-script.txt60
-rw-r--r--Documentation/git-commit-tree.txt92
-rw-r--r--Documentation/git-convert-cache.txt30
-rw-r--r--Documentation/git-cvsimport-script.txt80
-rw-r--r--Documentation/git-diff-cache.txt134
-rw-r--r--Documentation/git-diff-files.txt44
-rw-r--r--Documentation/git-diff-helper.txt53
-rw-r--r--Documentation/git-diff-stages.txt41
-rw-r--r--Documentation/git-diff-tree.txt130
-rw-r--r--Documentation/git-export.txt31
-rw-r--r--Documentation/git-fetch-pack.txt73
-rw-r--r--Documentation/git-fetch-script.txt47
-rw-r--r--Documentation/git-fsck-cache.txt149
-rw-r--r--Documentation/git-hash-object.txt36
-rw-r--r--Documentation/git-http-pull.txt41
-rw-r--r--Documentation/git-init-db.txt40
-rw-r--r--Documentation/git-local-pull.txt43
-rw-r--r--Documentation/git-ls-files.txt196
-rw-r--r--Documentation/git-ls-tree.txt55
-rw-r--r--Documentation/git-merge-base.txt34
-rw-r--r--Documentation/git-merge-cache.txt84
-rw-r--r--Documentation/git-merge-one-file-script.txt30
-rw-r--r--Documentation/git-mktag.txt48
-rw-r--r--Documentation/git-pack-objects.txt80
-rw-r--r--Documentation/git-peek-remote.txt53
-rw-r--r--Documentation/git-prune-script.txt32
-rw-r--r--Documentation/git-pull-script.txt37
-rw-r--r--Documentation/git-read-tree.txt268
-rw-r--r--Documentation/git-receive-pack.txt41
-rw-r--r--Documentation/git-rename-script.txt34
-rw-r--r--Documentation/git-resolve-script.txt30
-rw-r--r--Documentation/git-rev-list.txt75
-rw-r--r--Documentation/git-rev-tree.txt88
-rw-r--r--Documentation/git-send-pack.txt46
-rw-r--r--Documentation/git-show-index.txt36
-rw-r--r--Documentation/git-ssh-pull.txt52
-rw-r--r--Documentation/git-ssh-push.txt48
-rw-r--r--Documentation/git-tag-script.txt38
-rw-r--r--Documentation/git-tar-tree.txt39
-rw-r--r--Documentation/git-unpack-file.txt37
-rw-r--r--Documentation/git-unpack-objects.txt33
-rw-r--r--Documentation/git-update-cache.txt126
-rw-r--r--Documentation/git-update-server-info.txt42
-rw-r--r--Documentation/git-upload-pack.txt40
-rw-r--r--Documentation/git-var.txt60
-rw-r--r--Documentation/git-verify-pack.txt49
-rw-r--r--Documentation/git-write-tree.txt44
-rw-r--r--Documentation/git.txt351
-rw-r--r--Documentation/pack-protocol.txt38
-rw-r--r--Documentation/pull-fetch-param.txt36
-rw-r--r--Documentation/tutorial.txt1111
-rw-r--r--INSTALL66
-rw-r--r--Makefile227
-rw-r--r--README482
-rw-r--r--apply.c1516
-rw-r--r--blob.c52
-rw-r--r--blob.h18
-rw-r--r--build-rev-cache.c56
-rw-r--r--cache.h314
-rw-r--r--cat-file.c55
-rw-r--r--check-files.c47
-rw-r--r--checkout-cache.c150
-rw-r--r--clone-pack.c182
-rw-r--r--commit-tree.c131
-rw-r--r--commit.c462
-rw-r--r--commit.h70
-rw-r--r--connect.c232
-rw-r--r--convert-cache.c326
-rw-r--r--count-delta.c79
-rw-r--r--count-delta.h10
-rw-r--r--csum-file.c147
-rw-r--r--csum-file.h19
-rw-r--r--daemon.c356
-rw-r--r--date.c459
-rw-r--r--debian/changelog21
-rw-r--r--debian/compat1
-rw-r--r--debian/control19
-rw-r--r--debian/copyright3
-rw-r--r--debian/docs3
-rw-r--r--debian/git-core.doc-base12
-rw-r--r--debian/git-core.install1
-rwxr-xr-xdebian/rules95
-rw-r--r--delta.h34
-rw-r--r--diff-cache.c294
-rw-r--r--diff-delta.c334
-rw-r--r--diff-files.c157
-rw-r--r--diff-helper.c154
-rw-r--r--diff-stages.c130
-rw-r--r--diff-tree.c559
-rw-r--r--diff.c1201
-rw-r--r--diff.h101
-rw-r--r--diffcore-break.c283
-rw-r--r--diffcore-order.c122
-rw-r--r--diffcore-pathspec.c65
-rw-r--r--diffcore-pickaxe.c104
-rw-r--r--diffcore-rename.c416
-rw-r--r--diffcore.h104
-rw-r--r--entry.c156
-rw-r--r--epoch.c639
-rw-r--r--epoch.h21
-rw-r--r--export.c81
-rw-r--r--fetch-pack.c144
-rw-r--r--fsck-cache.c534
-rw-r--r--get-tar-commit-id.c27
-rwxr-xr-xgit19
-rwxr-xr-xgit-add-script2
-rwxr-xr-xgit-apply-patch-script144
-rwxr-xr-xgit-branch-script17
-rwxr-xr-xgit-checkout-script75
-rwxr-xr-xgit-cherry86
-rwxr-xr-xgit-clone-dumb-http51
-rwxr-xr-xgit-clone-script117
-rwxr-xr-xgit-commit-script108
-rw-r--r--git-core.spec.in48
-rwxr-xr-xgit-cvsimport-script742
-rwxr-xr-xgit-diff-script19
-rwxr-xr-xgit-external-diff-script67
-rwxr-xr-xgit-fetch-script44
-rwxr-xr-xgit-format-patch-script188
-rwxr-xr-xgit-log-script5
-rwxr-xr-xgit-ls-remote-script104
-rwxr-xr-xgit-merge-one-file-script87
-rwxr-xr-xgit-parse-remote79
-rwxr-xr-xgit-prune-script25
-rwxr-xr-xgit-pull-script12
-rwxr-xr-xgit-push-script3
-rwxr-xr-xgit-rebase-script49
-rwxr-xr-xgit-relink-script173
-rwxr-xr-xgit-rename-script70
-rwxr-xr-xgit-repack-script14
-rwxr-xr-xgit-request-pull-script35
-rwxr-xr-xgit-reset-script5
-rwxr-xr-xgit-resolve-script64
-rwxr-xr-xgit-sh-setup-script17
-rwxr-xr-xgit-shortlog177
-rwxr-xr-xgit-status-script38
-rwxr-xr-xgit-tag-script72
-rwxr-xr-xgit-verify-tag-script8
-rwxr-xr-xgit-whatchanged7
-rw-r--r--gitenv.c75
-rw-r--r--hash-object.c45
-rw-r--r--http-pull.c189
-rw-r--r--ident.c148
-rw-r--r--index.c53
-rw-r--r--init-db.c93
-rw-r--r--local-pull.c151
-rw-r--r--ls-files.c431
-rw-r--r--ls-tree.c247
-rw-r--r--merge-base.c72
-rw-r--r--merge-cache.c128
-rw-r--r--mktag.c136
-rw-r--r--mozilla-sha1/sha1.c152
-rw-r--r--mozilla-sha1/sha1.h45
-rw-r--r--object.c152
-rw-r--r--object.h38
-rw-r--r--pack-check.c143
-rw-r--r--pack-objects.c471
-rw-r--r--pack.h32
-rw-r--r--patch-delta.c72
-rw-r--r--patch-id.c79
-rw-r--r--path.c60
-rw-r--r--peek-remote.c55
-rw-r--r--pkt-line.c117
-rw-r--r--pkt-line.h12
-rw-r--r--ppc/sha1.c72
-rw-r--r--ppc/sha1.h20
-rw-r--r--ppc/sha1ppc.S185
-rw-r--r--prune-packed.c66
-rw-r--r--pull.c178
-rw-r--r--pull.h41
-rw-r--r--quote.c41
-rw-r--r--quote.h26
-rw-r--r--read-cache.c497
-rw-r--r--read-tree.c603
-rw-r--r--receive-pack.c222
-rw-r--r--refs.c251
-rw-r--r--refs.h28
-rw-r--r--rev-cache.c320
-rw-r--r--rev-cache.h29
-rw-r--r--rev-list.c546
-rw-r--r--rev-parse.c412
-rw-r--r--rev-tree.c140
-rw-r--r--rsh.c68
-rw-r--r--rsh.h7
-rw-r--r--send-pack.c292
-rw-r--r--server-info.c572
-rw-r--r--sha1_file.c1380
-rw-r--r--show-index.c28
-rw-r--r--show-rev-cache.c18
-rw-r--r--ssh-pull.c97
-rw-r--r--ssh-push.c131
-rw-r--r--strbuf.c44
-rw-r--r--strbuf.h13
-rw-r--r--stripspace.c48
-rw-r--r--t/Makefile15
-rw-r--r--t/README208
-rwxr-xr-xt/diff-lib.sh35
-rwxr-xr-xt/lib-read-tree-m-3way.sh158
-rwxr-xr-xt/t0000-basic.sh179
-rwxr-xr-xt/t0100-environment-names.sh84
-rwxr-xr-xt/t0110-environment-names-old.sh132
-rwxr-xr-xt/t1000-read-tree-m-3way.sh517
-rwxr-xr-xt/t1001-read-tree-m-2way.sh344
-rwxr-xr-xt/t1002-read-tree-m-u-2way.sh324
-rwxr-xr-xt/t1005-read-tree-m-2way-emu23.sh422
-rwxr-xr-xt/t1100-commit-tree-options.sh45
-rwxr-xr-xt/t2000-checkout-cache-clash.sh53
-rwxr-xr-xt/t2001-checkout-cache-clash.sh87
-rwxr-xr-xt/t2002-checkout-cache-u.sh33
-rwxr-xr-xt/t2003-checkout-cache-mkdir.sh95
-rwxr-xr-xt/t2100-update-cache-badpath.sh51
-rwxr-xr-xt/t3000-ls-files-others.sh34
-rwxr-xr-xt/t3001-ls-files-others-exclude.sh55
-rwxr-xr-xt/t3010-ls-files-killed.sh61
-rwxr-xr-xt/t3100-ls-tree-restrict.sh131
-rwxr-xr-xt/t4000-diff-format.sh62
-rwxr-xr-xt/t4001-diff-rename.sh66
-rwxr-xr-xt/t4002-diff-basic.sh247
-rwxr-xr-xt/t4003-diff-rename-1.sh128
-rwxr-xr-xt/t4004-diff-rename-symlink.sh66
-rwxr-xr-xt/t4005-diff-rename-2.sh166
-rwxr-xr-xt/t4006-diff-mode.sh34
-rwxr-xr-xt/t4007-rename-3.sh90
-rwxr-xr-xt/t4008-diff-break-rewrite.sh188
-rwxr-xr-xt/t4009-diff-rename-4.sh175
-rwxr-xr-xt/t4010-diff-pathspec.sh65
-rwxr-xr-xt/t4100-apply-stat.sh47
-rw-r--r--t/t4100/t-apply-1.expect11
-rw-r--r--t/t4100/t-apply-1.patch194
-rw-r--r--t/t4100/t-apply-2.expect5
-rw-r--r--t/t4100/t-apply-2.patch72
-rw-r--r--t/t4100/t-apply-3.expect7
-rw-r--r--t/t4100/t-apply-3.patch567
-rw-r--r--t/t4100/t-apply-4.expect5
-rw-r--r--t/t4100/t-apply-4.patch7
-rw-r--r--t/t4100/t-apply-5.expect19
-rw-r--r--t/t4100/t-apply-5.patch612
-rw-r--r--t/t4100/t-apply-6.expect5
-rw-r--r--t/t4100/t-apply-6.patch101
-rw-r--r--t/t4100/t-apply-7.expect6
-rw-r--r--t/t4100/t-apply-7.patch494
-rwxr-xr-xt/t4101-apply-nonl.sh32
-rwxr-xr-xt/t5000-tar-tree.sh94
-rwxr-xr-xt/t5300-pack-object.sh168
-rwxr-xr-xt/t5400-send-pack.sh54
-rwxr-xr-xt/t6000lib.sh109
-rwxr-xr-xt/t6001-rev-list-merge-order.sh455
-rwxr-xr-xt/t6002-rev-list-bisect.sh241
-rwxr-xr-xt/t6003-rev-list-topo-order.sh408
-rwxr-xr-xt/test-lib.sh152
-rw-r--r--tag.c93
-rw-r--r--tag.h19
-rw-r--r--tar-tree.c439
-rw-r--r--test-date.c20
-rw-r--r--test-delta.c81
-rw-r--r--tools/Makefile26
-rwxr-xr-xtools/git-applymbox63
-rwxr-xr-xtools/git-applypatch64
-rw-r--r--tools/mailinfo.c268
-rw-r--r--tools/mailsplit.c144
-rw-r--r--tree.c214
-rw-r--r--tree.h34
-rw-r--r--unpack-file.c34
-rw-r--r--unpack-objects.c312
-rw-r--r--update-cache.c401
-rw-r--r--update-server-info.c23
-rw-r--r--upload-pack.c185
-rw-r--r--usage.c39
-rw-r--r--var.c65
-rw-r--r--verify-pack.c57
-rw-r--r--write-tree.c152
283 files changed, 40569 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..6ff87c4
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,361 @@
+
+ Note that the only valid version of the GPL as far as this project
+ is concerned is _this_ particular version of the license (ie v2, not
+ v2.2 or v3.x or whatever), unless explicitly otherwise stated.
+
+ HOWEVER, in order to allow a migration to GPLv3 if that seems like
+ a good idea, I also ask that people involved with the project make
+ their preferences known. In particular, if you trust me to make that
+ decision, you might note so in your copyright message, ie something
+ like
+
+ This file is licensed under the GPL v2, or a later version
+ at the discretion of Linus.
+
+ might avoid issues. But we can also just decide to synchronize and
+ contact all copyright holders on record if/when the occasion arises.
+
+ Linus Torvalds
+
+----------------------------------------
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/Documentation/Makefile b/Documentation/Makefile
new file mode 100644
index 0000000..4798933
--- /dev/null
+++ b/Documentation/Makefile
@@ -0,0 +1,54 @@
+MAN1_TXT=$(wildcard git-*.txt)
+MAN7_TXT=git.txt
+
+DOC_HTML=$(patsubst %.txt,%.html,$(MAN1_TXT) $(MAN7_TXT))
+
+DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+prefix=$(HOME)
+bin=$(prefix)/bin
+mandir=$(prefix)/man
+man1=$(mandir)/man1
+man7=$(mandir)/man7
+
+INSTALL=install
+
+#
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+# http://marc.theaimsgroup.com/?l=git&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+#
+
+all: html man
+
+html: $(DOC_HTML)
+
+
+man: man1 man7
+man1: $(DOC_MAN1)
+man7: $(DOC_MAN7)
+
+install:
+ $(INSTALL) -m755 -d $(dest)/$(man1) $(dest)/$(man7)
+ $(INSTALL) $(DOC_MAN1) $(dest)/$(man1)
+ $(INSTALL) $(DOC_MAN7) $(dest)/$(man7)
+
+# 'include' dependencies
+git-diff-%.txt: diff-format.txt diff-options.txt
+ touch $@
+
+clean:
+ rm -f *.xml *.html *.1 *.7
+
+%.html : %.txt
+ asciidoc -b xhtml11 -d manpage $<
+
+%.1 %.7 : %.xml
+ xmlto man $<
+
+%.xml : %.txt
+ asciidoc -b docbook -d manpage $<
diff --git a/Documentation/cvs-migration.txt b/Documentation/cvs-migration.txt
new file mode 100644
index 0000000..a4d4b7e
--- /dev/null
+++ b/Documentation/cvs-migration.txt
@@ -0,0 +1,232 @@
+Git for CVS users
+=================
+
+Ok, so you're a CVS user. That's ok, it's a treatable condition, and the
+first step to recovery is admitting you have a problem. The fact that
+you are reading this file means that you may be well on that path
+already.
+
+The thing about CVS is that it absolutely sucks as a source control
+manager, and you'll thus be happy with almost anything else. Git,
+however, may be a bit _too_ different (read: "good") for your taste, and
+does a lot of things differently.
+
+One particular suckage of CVS is very hard to work around: CVS is
+basically a tool for tracking _file_ history, while git is a tool for
+tracking _project_ history. This sometimes causes problems if you are
+used to doing very strange things in CVS, in particular if you're doing
+things like making branches of just a subset of the project. Git can't
+track that, since git never tracks things on the level of an individual
+file, only on the whole project level.
+
+The good news is that most people don't do that, and in fact most sane
+people think it's a bug in CVS that makes it tag (and check in changes)
+one file at a time. So most projects you'll ever see will use CVS
+_as_if_ it was sane. In which case you'll find it very easy indeed to
+move over to Git.
+
+First off: this is not a git tutorial. See Documentation/tutorial.txt
+for how git actually works. This is more of a random collection of
+gotcha's and notes on converting from CVS to git.
+
+Second: CVS has the notion of a "repository" as opposed to the thing
+that you're actually working in (your working directory, or your
+"checked out tree"). Git does not have that notion at all, and all git
+working directories _are_ the repositories. However, you can easily
+emulate the CVS model by having one special "global repository", which
+people can synchronize with. See details later, but in the meantime
+just keep in mind that with git, every checked out working tree will
+have a full revision control history of its own.
+
+
+Importing a CVS archive
+-----------------------
+
+Ok, you have an old project, and you want to at least give git a chance
+to see how it performs. The first thing you want to do (after you've
+gone through the git tutorial, and generally familiarized yourself with
+how to commit stuff etc in git) is to create a git'ified version of your
+CVS archive.
+
+Happily, that's very easy indeed. Git will do it for you, although git
+will need the help of a program called "cvsps":
+
+ http://www.cobite.com/cvsps/
+
+which is not actually related to git at all, but which makes CVS usage
+look almost sane (ie you almost certainly want to have it even if you
+decide to stay with CVS). However, git will want at _least_ version 2.1
+of cvsps (available at the address above), and in fact will currently
+refuse to work with anything else.
+
+Once you've gotten (and installed) cvsps, you may or may not want to get
+any more familiar with it, but make sure it is in your path. After that,
+the magic command line is
+
+ git cvsimport -v -d <cvsroot> -C <destination> <module>
+
+which will do exactly what you'd think it does: it will create a git
+archive of the named CVS module. The new archive will be created in the
+subdirectory named <destination>; it'll be created if it doesn't exist.
+Default is the local directory.
+
+It can take some time to actually do the conversion for a large archive
+since it involves checking out from CVS every revision of every file,
+and the conversion script is reasonably chatty unless you omit the '-v'
+option, but on some not very scientific tests it averaged about twenty
+revisions per second, so a medium-sized project should not take more
+than a couple of minutes. For larger projects or remote repositories,
+the process may take longer.
+
+After the (initial) import is done, the CVS archive's current head
+revision will be checked out -- thus, you can start adding your own
+changes right away.
+
+The import is incremental, i.e. if you call it again next month it'll
+fetch any CVS updates that have been happening in the meantime. The
+cut-off is date-based, so don't change the branches that were imported
+from CVS.
+
+You can merge those updates (or, in fact, a different CVS branch) into
+your main branch:
+
+ cg-merge <branch>
+
+The HEAD revision from CVS is named "origin", not "HEAD", because git
+already uses "HEAD". (If you don't like 'origin', use cvsimport's
+'-o' option to change it.)
+
+
+Emulating CVS behaviour
+-----------------------
+
+
+FIXME! Talk about setting up several repositories, and pulling and
+pushing between them. Talk about merging, and branches. Some of this
+needs to be in the tutorial too.
+
+
+
+CVS annotate
+------------
+
+So, something has gone wrong, and you don't know whom to blame, and
+you're an ex-CVS user and used to do "cvs annotate" to see who caused
+the breakage. You're looking for the "git annotate", and it's just
+claiming not to find such a script. You're annoyed.
+
+Yes, that's right. Core git doesn't do "annotate", although it's
+technically possible, and there are at least two specialized scripts out
+there that can be used to get equivalent information (see the git
+mailing list archives for details).
+
+Git has a couple of alternatives, though, that you may find sufficient
+or even superior depending on your use. One is called "git-whatchanged"
+(for obvious reasons) and the other one is called "pickaxe" ("a tool for
+the software archeologist").
+
+The "git-whatchanged" script is a truly trivial script that can give you
+a good overview of what has changed in a file or a directory (or an
+arbitrary list of files or directories). The "pickaxe" support is an
+additional layer that can be used to further specify exactly what you're
+looking for, if you already know the specific area that changed.
+
+Let's step back a bit and think about the reason why you would
+want to do "cvs annotate a-file.c" to begin with.
+
+You would use "cvs annotate" on a file when you have trouble
+with a function (or even a single "if" statement in a function)
+that happens to be defined in the file, which does not do what
+you want it to do. And you would want to find out why it was
+written that way, because you are about to modify it to suit
+your needs, and at the same time you do not want to break its
+current callers. For that, you are trying to find out why the
+original author did things that way in the original context.
+
+Many times, it may be enough to see the commit log messages of
+commits that touch the file in question, possibly along with the
+patches themselves, like this:
+
+ $ git-whatchanged -p a-file.c
+
+This will show log messages and patches for each commit that
+touches a-file.
+
+This, however, may not be very useful when this file has many
+modifications that are not related to the piece of code you are
+interested in. You would see many log messages and patches that
+do not have anything to do with the piece of code you are
+interested in. As an example, assuming that you have this piece
+code that you are interested in in the HEAD version:
+
+ if (frotz) {
+ nitfol();
+ }
+
+you would use git-rev-list and git-diff-tree like this:
+
+ $ git-rev-list HEAD |
+ git-diff-tree --stdin -v -p -S'if (frotz) {
+ nitfol();
+ }'
+
+We have already talked about the "--stdin" form of git-diff-tree
+command that reads the list of commits and compares each commit
+with its parents. The git-whatchanged command internally runs
+the equivalent of the above command, and can be used like this:
+
+ $ git-whatchanged -p -S'if (frotz) {
+ nitfol();
+ }'
+
+When the -S option is used, git-diff-tree command outputs
+differences between two commits only if one tree has the
+specified string in a file and the corresponding file in the
+other tree does not. The above example looks for a commit that
+has the "if" statement in it in a file, but its parent commit
+does not have it in the same shape in the corresponding file (or
+the other way around, where the parent has it and the commit
+does not), and the differences between them are shown, along
+with the commit message (thanks to the -v flag). It does not
+show anything for commits that do not touch this "if" statement.
+
+Also, in the original context, the same statement might have
+appeared at first in a different file and later the file was
+renamed to "a-file.c". CVS annotate would not help you to go
+back across such a rename, but GIT would still help you in such
+a situation. For that, you can give the -C flag to
+git-diff-tree, like this:
+
+ $ git-whatchanged -p -C -S'if (frotz) {
+ nitfol();
+ }'
+
+When the -C flag is used, file renames and copies are followed.
+So if the "if" statement in question happens to be in "a-file.c"
+in the current HEAD commit, even if the file was originally
+called "o-file.c" and then renamed in an earlier commit, or if
+the file was created by copying an existing "o-file.c" in an
+earlier commit, you will not lose track. If the "if" statement
+did not change across such rename or copy, then the commit that
+does rename or copy would not show in the output, and if the
+"if" statement was modified while the file was still called
+"o-file.c", it would find the commit that changed the statement
+when it was in "o-file.c".
+
+[ BTW, the current versions of "git-diff-tree -C" is not eager
+ enough to find copies, and it will miss the fact that a-file.c
+ was created by copying o-file.c unless o-file.c was somehow
+ changed in the same commit.]
+
+You can use the --pickaxe-all flag in addition to the -S flag.
+This causes the differences from all the files contained in
+those two commits, not just the differences between the files
+that contain this changed "if" statement:
+
+ $ git-whatchanged -p -C -S'if (frotz) {
+ nitfol();
+ }' --pickaxe-all
+
+[ Side note. This option is called "--pickaxe-all" because -S
+ option is internally called "pickaxe", a tool for software
+ archaeologists.]
diff --git a/Documentation/diff-format.txt b/Documentation/diff-format.txt
new file mode 100644
index 0000000..d6ce035
--- /dev/null
+++ b/Documentation/diff-format.txt
@@ -0,0 +1,135 @@
+The output format from "git-diff-cache", "git-diff-tree" and
+"git-diff-files" is very similar.
+
+These commands all compare two sets of things; what are
+compared are different:
+
+git-diff-cache <tree-ish>::
+ compares the <tree-ish> and the files on the filesystem.
+
+git-diff-cache --cached <tree-ish>::
+ compares the <tree-ish> and the cache.
+
+git-diff-tree [-r] <tree-ish-1> <tree-ish-2> [<pattern>...]::
+ compares the trees named by the two arguments.
+
+git-diff-files [<pattern>...]::
+ compares the cache and the files on the filesystem.
+
+
+An output line is formatted this way:
+
+in-place edit :100644 100644 bcd1234... 0123456... M file0
+copy-edit :100644 100644 abcd123... 1234567... C68 file1 file2
+rename-edit :100644 100644 abcd123... 1234567... R86 file1 file3
+create :000000 100644 0000000... 1234567... N file4
+delete :100644 000000 1234567... 0000000... D file5
+unmerged :000000 000000 0000000... 0000000... U file6
+
+That is, from the left to the right:
+
+ (1) a colon.
+ (2) mode for "src"; 000000 if creation or unmerged.
+ (3) a space.
+ (4) mode for "dst"; 000000 if deletion or unmerged.
+ (5) a space.
+ (6) sha1 for "src"; 0{40} if creation or unmerged.
+ (7) a space.
+ (8) sha1 for "dst"; 0{40} if creation, unmerged or "look at work tree".
+ (9) a space.
+ (10) status, followed by optional "score" number.
+ (11) a tab or a NUL when '-z' option is used.
+ (12) path for "src"
+ (13) a tab or a NUL when '-z' option is used; only exists for C or R.
+ (14) path for "dst"; only exists for C or R.
+ (15) an LF or a NUL when '-z' option is used, to terminate the record.
+
+<sha1> is shown as all 0's if new is a file on the filesystem
+and it is out of sync with the cache. Example:
+
+ :100644 100644 5be4a4...... 000000...... M file.c
+
+Generating patches with -p
+--------------------------
+
+When "git-diff-cache", "git-diff-tree", or "git-diff-files" are run
+with a '-p' option, they do not produce the output described above;
+instead they produce a patch file.
+
+The patch generation can be customized at two levels. This
+customization also applies to "git-diff-helper".
+
+1. When the environment variable 'GIT_EXTERNAL_DIFF' is not set,
+ these commands internally invoke "diff" like this:
+
+ diff -L a/<path> -L a/<path> -pu <old> <new>
++
+For added files, `/dev/null` is used for <old>. For removed
+files, `/dev/null` is used for <new>
++
+The "diff" formatting options can be customized via the
+environment variable 'GIT_DIFF_OPTS'. For example, if you
+prefer context diff:
+
+ GIT_DIFF_OPTS=-c git-diff-cache -p $(cat .git/HEAD)
+
+
+2. When the environment variable 'GIT_EXTERNAL_DIFF' is set, the
+ program named by it is called, instead of the diff invocation
+ described above.
++
+For a path that is added, removed, or modified,
+'GIT_EXTERNAL_DIFF' is called with 7 parameters:
+
+ path old-file old-hex old-mode new-file new-hex new-mode
++
+where:
+
+ <old|new>-file:: are files GIT_EXTERNAL_DIFF can use to read the
+ contents of <old|ne>,
+ <old|new>-hex:: are the 40-hexdigit SHA1 hashes,
+ <old|new>-mode:: are the octal representation of the file modes.
+
++
+The file parameters can point at the user's working file
+(e.g. `new-file` in "git-diff-files"), `/dev/null` (e.g. `old-file`
+when a new file is added), or a temporary file (e.g. `old-file` in the
+cache). 'GIT_EXTERNAL_DIFF' should not worry about unlinking the
+temporary file --- it is removed when 'GIT_EXTERNAL_DIFF' exits.
+
+For a path that is unmerged, 'GIT_EXTERNAL_DIFF' is called with 1
+parameter, <path>.
+
+
+Git specific extention to diff format
+-------------------------------------
+
+What -p option produces is slightly different from the
+traditional diff format.
+
+ (1) It is preceeded with a "git diff" header, that looks like
+ this:
+
+ diff --git a/file1 b/file2
+
+ The a/ and b/ filenames are the same unless rename/copy is
+ involved. Especially, even for a creation or a deletion,
+ /dev/null is _not_ used in place of a/ or b/ filename.
+
+ When rename/copy is involved, file1 and file2 shows the
+ name of the source file of the rename/copy and the name of
+ the file that rename/copy produces, respectively.
+
+ (2) It is followed by extended header lines that are one or
+ more of:
+
+ old mode <mode>
+ new mode <mode>
+ deleted file mode <mode>
+ new file mode <mode>
+ copy from <path>
+ copy to <path>
+ rename from <path>
+ rename to <path>
+ similarity index <number>
+ dissimilarity index <number>
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
new file mode 100644
index 0000000..58b9c46
--- /dev/null
+++ b/Documentation/diff-options.txt
@@ -0,0 +1,53 @@
+-p::
+ Generate patch (see section on generating patches)
+
+-u::
+ Synonym for "-p".
+
+-r::
+ Look recursivelly in subdirectories; this flag does not
+ mean anything to commands other than "git-diff-tree";
+ other commands always looks at all the subdirectories.
+
+-z::
+ \0 line termination on output
+
+--name-only::
+ Show only names of changed files.
+
+--name-only-z::
+ Same as --name-only, but terminate lines with NUL.
+
+-B::
+ Break complete rewrite changes into pairs of delete and create.
+
+-M::
+ Detect renames.
+
+-C::
+ Detect copies as well as renames.
+
+--find-copies-harder::
+ By default, -C option finds copies only if the original
+ file of the copy was modified in the same changeset for
+ performance reasons. This flag makes the command
+ inspect unmodified files as candidates for the source of
+ copy. This is a very expensive operation for large
+ projects, so use it with caution.
+
+-S<string>::
+ Look for differences that contains the change in <string>.
+
+--pickaxe-all::
+ When -S finds a change, show all the changes in that
+ changeset, not just the files that contains the change
+ in <string>.
+
+-O<orderfile>::
+ Output the patch in the order specified in the
+ <orderfile>, which has one shell glob pattern per line.
+
+-R::
+ Swap two inputs; that is, show differences from cache or
+ on-disk file to tree contents.
+
diff --git a/Documentation/diffcore.txt b/Documentation/diffcore.txt
new file mode 100644
index 0000000..6c474d1
--- /dev/null
+++ b/Documentation/diffcore.txt
@@ -0,0 +1,248 @@
+Tweaking diff output
+====================
+June 2005
+
+
+Introduction
+------------
+
+The diff commands git-diff-cache, git-diff-files, and
+git-diff-tree can be told to manipulate differences they find
+in unconventional ways before showing diff(1) output. The
+manipulation is collectively called "diffcore transformation".
+This short note describes what they are and how to use them to
+produce diff outputs that are easier to understand than the
+conventional kind.
+
+
+The chain of operation
+----------------------
+
+The git-diff-* family works by first comparing two sets of
+files:
+
+ - git-diff-cache compares contents of a "tree" object and the
+ working directory (when --cached flag is not used) or a
+ "tree" object and the index file (when --cached flag is
+ used);
+
+ - git-diff-files compares contents of the index file and the
+ working directory;
+
+ - git-diff-tree compares contents of two "tree" objects.
+
+In all of these cases, the commands themselves compare
+corresponding paths in the two sets of files. The result of
+comparison is passed from these commands to what is internally
+called "diffcore", in a format similar to what is output when
+the -p option is not used. E.g.
+
+ in-place edit :100644 100644 bcd1234... 0123456... M file0
+ create :000000 100644 0000000... 1234567... N file4
+ delete :100644 000000 1234567... 0000000... D file5
+ unmerged :000000 000000 0000000... 0000000... U file6
+
+The diffcore mechanism is fed a list of such comparison results
+(each of which is called "filepair", although at this point each
+of them talks about a single file), and transforms such a list
+into another list. There are currently 6 such transformations:
+
+ - diffcore-pathspec
+ - diffcore-break
+ - diffcore-rename
+ - diffcore-merge-broken
+ - diffcore-pickaxe
+ - diffcore-order
+
+These are applied in sequence. The set of filepairs git-diff-*
+commands find are used as the input to diffcore-pathspec, and
+the output from diffcore-pathspec is used as the input to the
+next transformation. The final result is then passed to the
+output routine and generates either diff-raw format (see Output
+format sections of the manual for git-diff-* commands) or
+diff-patch format.
+
+
+diffcore-pathspec
+-----------------
+
+The first transformation in the chain is diffcore-pathspec, and
+is controlled by giving the pathname parameters to the
+git-diff-* commands on the command line. The pathspec is used
+to limit the world diff operates in. It removes the filepairs
+outside the specified set of pathnames.
+
+Implementation note. For performance reasons, git-diff-tree
+uses the pathname parameters on the command line to cull set of
+filepairs it feeds the diffcore mechanism itself, and does not
+use diffcore-pathspec, but the end result is the same.
+
+
+diffcore-break
+--------------
+
+The second transformation in the chain is diffcore-break, and is
+controlled by the -B option to the git-diff-* commands. This is
+used to detect a filepair that represents "complete rewrite" and
+break such filepair into two filepairs that represent delete and
+create. E.g. If the input contained this filepair:
+
+ :100644 100644 bcd1234... 0123456... M file0
+
+and if it detects that the file "file0" is completely rewritten,
+it changes it to:
+
+ :100644 000000 bcd1234... 0000000... D file0
+ :000000 100644 0000000... 0123456... N file0
+
+For the purpose of breaking a filepair, diffcore-break examines
+the extent of changes between the contents of the files before
+and after modification (i.e. the contents that have "bcd1234..."
+and "0123456..." as their SHA1 content ID, in the above
+example). The amount of deletion of original contents and
+insertion of new material are added together, and if it exceeds
+the "break score", the filepair is broken into two. The break
+score defaults to 50% of the size of the smaller of the original
+and the result (i.e. if the edit shrinks the file, the size of
+the result is used; if the edit lengthens the file, the size of
+the original is used), and can be customized by giving a number
+after "-B" option (e.g. "-B75" to tell it to use 75%).
+
+
+diffcore-rename
+---------------
+
+This transformation is used to detect renames and copies, and is
+controlled by the -M option (to detect renames) and the -C option
+(to detect copies as well) to the git-diff-* commands. If the
+input contained these filepairs:
+
+ :100644 000000 0123456... 0000000... D fileX
+ :000000 100644 0000000... 0123456... N file0
+
+and the contents of the deleted file fileX is similar enough to
+the contents of the created file file0, then rename detection
+merges these filepairs and creates:
+
+ :100644 100644 0123456... 0123456... R100 fileX file0
+
+When the "-C" option is used, the original contents of modified
+files and contents of unchanged files are considered as
+candidates of the source files in rename/copy operation, in
+addition to the deleted files. If the input were like these
+filepairs, that talk about a modified file fileY and a newly
+created file file0:
+
+ :100644 100644 0123456... 1234567... M fileY
+ :000000 100644 0000000... 0123456... N file0
+
+the original contents of fileY and the resulting contents of
+file0 are compared, and if they are similar enough, they are
+changed to:
+
+ :100644 100644 0123456... 1234567... M fileY
+ :100644 100644 0123456... 0123456... C100 fileY file0
+
+In both rename and copy detection, the same "extent of changes"
+algorithm used in diffcore-break is used to determine if two
+files are "similar enough", and can be customized to use
+similarity score different from the default 50% by giving a
+number after "-M" or "-C" option (e.g. "-M8" to tell it to use
+8/10 = 80%).
+
+Note. When the "-C" option is used with --find-copies-harder
+option, git-diff-* commands feed unmodified filepairs to
+diffcore mechanism as well as modified ones. This lets the copy
+detector consider unmodified files as copy source candidates at
+the expense of making it slower. Without --find-copies-harder,
+git-diff-* commands can detect copies only if the file that was
+copied happened to have been modified in the same changeset.
+
+
+diffcore-merge-broken
+---------------------
+
+This transformation is used to merge filepairs broken by
+diffcore-break, and were not transformed into rename/copy by
+diffcore-rename, back into a single modification. This always
+runs when diffcore-break is used.
+
+For the purpose of merging broken filepairs back, it uses a
+different "extent of changes" computation from the ones used by
+diffcore-break and diffcore-rename. It counts only the deletion
+from the original, and does not count insertion. If you removed
+only 10 lines from a 100-line document, even if you added 910
+new lines to make a new 1000-line document, you did not do a
+complete rewrite. diffcore-break breaks such a case in order to
+help diffcore-rename to consider such filepairs as candidate of
+rename/copy detection, but if filepairs broken that way were not
+matched with other filepairs to create rename/copy, then this
+transformation merges them back into the original
+"modification".
+
+The "extent of changes" parameter can be tweaked from the
+default 80% (that is, unless more than 80% of the original
+material is deleted, the broken pairs are merged back into a
+single modification) by giving a second number to -B option,
+like these:
+
+ -B50/60 (give 50% "break score" to diffcore-break, use
+ 60% for diffcore-merge-broken).
+ -B/60 (the same as above, since diffcore-break defautls to
+ 50%).
+
+Note that earlier implementation left a broken pair as a separate
+creation and deletion patches. This was unnecessary hack and
+the latest implementation always merges all the broken pairs
+back into modifications, but the resulting patch output is
+formatted differently to still let the reviewing easier for such
+a complete rewrite by showing the entire contents of old version
+prefixed with '-', followed by the entire contents of new
+version prefixed with '+'.
+
+
+diffcore-pickaxe
+----------------
+
+This transformation is used to find filepairs that represent
+changes that touch a specified string, and is controlled by the
+-S option and the --pickaxe-all option to the git-diff-*
+commands.
+
+When diffcore-pickaxe is in use, it checks if there are
+filepairs whose "original" side has the specified string and
+whose "result" side does not. Such a filepair represents "the
+string appeared in this changeset". It also checks for the
+opposite case that loses the specified string.
+
+When --pickaxe-all is not in effect, diffcore-pickaxe leaves
+only such filepairs that touches the specified string in its
+output. When --pickaxe-all is used, diffcore-pickaxe leaves all
+filepairs intact if there is such a filepair, or makes the
+output empty otherwise. The latter behaviour is designed to
+make reviewing of the changes in the context of the whole
+changeset easier.
+
+
+diffcore-order
+--------------
+
+This is used to reorder the filepairs according to the user's
+(or project's) taste, and is controlled by the -O option to the
+git-diff-* commands.
+
+This takes a text file each of whose line is a shell glob
+pattern. Filepairs that match a glob pattern on an earlier line
+in the file are output before ones that match a later line, and
+filepairs that do not match any glob pattern are output last.
+
+As an example, typical orderfile for the core GIT probably
+should look like this:
+
+ README
+ Makefile
+ Documentation
+ *.h
+ *.c
+ t
+
diff --git a/Documentation/git-apply-patch-script.txt b/Documentation/git-apply-patch-script.txt
new file mode 100644
index 0000000..a6f860d
--- /dev/null
+++ b/Documentation/git-apply-patch-script.txt
@@ -0,0 +1,32 @@
+git-apply-patch-script(1)
+=========================
+v0.1, May 2005
+
+NAME
+----
+git-apply-patch-script - Sample script to apply the diffs from git-diff-*
+
+
+SYNOPSIS
+--------
+'git-apply-patch-script'
+
+DESCRIPTION
+-----------
+This is a sample script to be used via the 'GIT_EXTERNAL_DIFF'
+environment variable to apply the differences that the "git-diff-*"
+family of commands report to the current work tree.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-apply.txt b/Documentation/git-apply.txt
new file mode 100644
index 0000000..391d6f5
--- /dev/null
+++ b/Documentation/git-apply.txt
@@ -0,0 +1,76 @@
+git-apply(1)
+============
+v0.1, June 2005
+
+NAME
+----
+git-apply - Apply patch on a GIT index file and a work tree
+
+
+SYNOPSIS
+--------
+'git-apply' [--no-merge] [--stat] [--summary] [--check] [--index] [--show-files] [--apply] [<patch>...]
+
+DESCRIPTION
+-----------
+Reads supplied diff output and applies it on a GIT index file
+and a work tree.
+
+OPTIONS
+-------
+<patch>...::
+ The files to read patch from. '-' can be used to read
+ from the standard input.
+
+--no-merge::
+ The default mode of operation is the merge behaviour
+ which is not implemented yet. This flag explicitly
+ tells the program not to use the merge behaviour.
+
+--stat::
+ Instead of applying the patch, output diffstat for the
+ input. Turns off "apply".
+
+--summary::
+ Instead of applying the patch, output a condensed
+ summary of information obtained from git diff extended
+ headers, such as creations, renames and mode changes.
+ Turns off "apply".
+
+--check::
+ Instead of applying the patch, see if the patch is
+ applicable to the current work tree and/or the index
+ file and detects errors. Turns off "apply".
+
+--index::
+ When --check is in effect, or when applying the patch
+ (which is the default when none of the options that
+ disables it is in effect), make sure the patch is
+ applicable to what the current index file records. If
+ the file to be patched in the work tree is not
+ up-to-date, it is flagged as an error. This flag also
+ causes the index file to be updated.
+
+--show-files::
+ Show summary of files that are affected by the patch.
+
+--apply::
+ If you use any of the options marked ``Turns off
+ "apply"'' above, git-apply reads and outputs the
+ information you asked without actually applying the
+ patch. Give this flag after those flags to also apply
+ the patch.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
new file mode 100644
index 0000000..2131a29
--- /dev/null
+++ b/Documentation/git-cat-file.txt
@@ -0,0 +1,61 @@
+git-cat-file(1)
+===============
+v0.1, May 2005
+
+NAME
+----
+git-cat-file - Provide content or type information for repository objects
+
+
+SYNOPSIS
+--------
+'git-cat-file' (-t | -s | <type>) <object>
+
+DESCRIPTION
+-----------
+Provides content or type of objects in the repository. The type
+is required unless '-t' is used to find the object type,
+or '-s' is used to find the object size.
+
+OPTIONS
+-------
+<object>::
+ The sha1 identifier of the object.
+
+-t::
+ Instead of the content, show the object type identified by
+ <object>.
+
+-s::
+ Instead of the content, show the object size identified by
+ <object>.
+
+<type>::
+ Typically this matches the real type of <object> but asking
+ for a type that can trivially dereferenced from the given
+ <object> is also permitted. An example is to ask for a
+ "tree" with <object> being a commit object that contains it,
+ or to ask for a "blob" with <object> being a tag object that
+ points at it.
+
+OUTPUT
+------
+If '-t' is specified, one of the <type>. If '-s' is specified,
+the size of the <object> in bytes.
+
+Otherwise the raw (though uncompressed) contents of the <object> will
+be returned.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-check-files.txt b/Documentation/git-check-files.txt
new file mode 100644
index 0000000..6146098
--- /dev/null
+++ b/Documentation/git-check-files.txt
@@ -0,0 +1,50 @@
+git-check-files(1)
+==================
+v0.1, May 2005
+
+NAME
+----
+git-check-files - Verify a list of files are up-to-date
+
+
+
+SYNOPSIS
+--------
+'git-check-files' <file>...
+
+DESCRIPTION
+-----------
+Check that a list of files are up-to-date between the filesystem and
+the cache. Used to verify a patch target before doing a patch.
+
+Files that do not exist on the filesystem are considered up-to-date
+(whether or not they are in the cache).
+
+Emits an error message on failure:
+
+preparing to update existing file <file> not in cache::
+ <file> exists but is not in the cache
+
+preparing to update file <file> not uptodate in cache::
+ <file> on disk is not up-to-date with the cache
+
+Exits with a status code indicating success if all files are
+up-to-date.
+
+See Also
+--------
+link:git-update-cache.html[git-update-cache]
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-checkout-cache.txt b/Documentation/git-checkout-cache.txt
new file mode 100644
index 0000000..321a00c
--- /dev/null
+++ b/Documentation/git-checkout-cache.txt
@@ -0,0 +1,106 @@
+git-checkout-cache(1)
+=====================
+v0.1, May 2005
+
+NAME
+----
+git-checkout-cache - Copy files from the cache to the working directory
+
+
+SYNOPSIS
+--------
+'git-checkout-cache' [-u] [-q] [-a] [-f] [-n] [--prefix=<string>]
+ [--] <file>...
+
+DESCRIPTION
+-----------
+Will copy all files listed from the cache to the working directory
+(not overwriting existing files).
+
+OPTIONS
+-------
+-u::
+ update stat information for the checked out entries in
+ the cache file.
+
+-q::
+ be quiet if files exist or are not in the cache
+
+-f::
+ forces overwrite of existing files
+
+-a::
+ checks out all files in the cache (will then continue to
+ process listed files).
+
+-n::
+ Don't checkout new files, only refresh files already checked
+ out.
+
+--prefix=<string>::
+ When creating files, prepend <string> (usually a directory
+ including a trailing /)
+
+--::
+ Do not interpret any more arguments as options.
+
+Note that the order of the flags matters:
+
+ git-checkout-cache -a -f file.c
+
+will first check out all files listed in the cache (but not overwrite
+any old ones), and then force-checkout `file.c` a second time (ie that
+one *will* overwrite any old contents with the same filename).
+
+Also, just doing "git-checkout-cache" does nothing. You probably meant
+"git-checkout-cache -a". And if you want to force it, you want
+"git-checkout-cache -f -a".
+
+Intuitiveness is not the goal here. Repeatability is. The reason for
+the "no arguments means no work" thing is that from scripts you are
+supposed to be able to do things like:
+
+ find . -name '*.h' -print0 | xargs -0 git-checkout-cache -f --
+
+which will force all existing `*.h` files to be replaced with their
+cached copies. If an empty command line implied "all", then this would
+force-refresh everything in the cache, which was not the point.
+
+To update and refresh only the files already checked out:
+
+ git-checkout-cache -n -f -a && git-update-cache --ignore-missing --refresh
+
+Oh, and the "--" is just a good idea when you know the rest will be
+filenames. Just so that you wouldn't have a filename of "-a" causing
+problems (not possible in the above example, but get used to it in
+scripting!).
+
+The prefix ability basically makes it trivial to use
+git-checkout-cache as an "export as tree" function. Just read the
+desired tree into the index, and do a
+
+ git-checkout-cache --prefix=git-export-dir/ -a
+
+and git-checkout-cache will "export" the cache into the specified
+directory.
+
+NOTE The final "/" is important. The exported name is literally just
+prefixed with the specified string, so you can also do something like
+
+ git-checkout-cache --prefix=.merged- Makefile
+
+to check out the currently cached copy of `Makefile` into the file
+`.merged-Makefile`
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-clone-pack.txt b/Documentation/git-clone-pack.txt
new file mode 100644
index 0000000..0dc89a9
--- /dev/null
+++ b/Documentation/git-clone-pack.txt
@@ -0,0 +1,65 @@
+git-clone-pack(1)
+=================
+v0.1, July 2005
+
+NAME
+----
+git-clone-pack - Clones a repository by receiving packed objects.
+
+
+SYNOPSIS
+--------
+'git-clone-pack' [-q] [--exec=<git-upload-pack>] [<host>:]<directory> [<head>...]
+
+DESCRIPTION
+-----------
+Clones a repository into the current repository by invoking
+'git-upload-pack', possibly on the remote host via ssh, in
+the named repository, and invoking 'git-unpack-objects' locally
+to receive the pack.
+
+OPTIONS
+-------
+-q::
+ Pass '-q' flag to 'git-unpack-objects'; this makes the
+ cloning process less verbose.
+
+--exec=<git-upload-pack>::
+ Use this to specify the path to 'git-upload-pack' on the
+ remote side, if is not found on your $PATH.
+ Installations of sshd ignores the user's environment
+ setup scripts for login shells (e.g. .bash_profile) and
+ your privately installed GIT may not be found on the system
+ default $PATH. Another workaround suggested is to set
+ up your $PATH in ".bashrc", but this flag is for people
+ who do not want to pay the overhead for non-interactive
+ shells by having a lean .bashrc file (they set most of
+ the things up in .bash_profile).
+
+<host>::
+ A remote host that houses the repository. When this
+ part is specified, 'git-upload-pack' is invoked via
+ ssh.
+
+<directory>::
+ The repository to sync from.
+
+<head>...::
+ The heads to update. This is relative to $GIT_DIR
+ (e.g. "HEAD", "refs/heads/master"). When unspecified,
+ all heads are updated to match the remote repository.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-clone-script.txt b/Documentation/git-clone-script.txt
new file mode 100644
index 0000000..afb25bd
--- /dev/null
+++ b/Documentation/git-clone-script.txt
@@ -0,0 +1,60 @@
+git-clone-script(1)
+===================
+v0.1, July 2005
+
+NAME
+----
+git-clone-script - Clones a repository.
+
+
+SYNOPSIS
+--------
+'git clone' [-l] [-u <upload-pack>] [-q] <repository> <directory>
+
+DESCRIPTION
+-----------
+Clones a repository into a newly created directory.
+
+OPTIONS
+-------
+-l::
+ When the repository to clone from is on a local machine,
+ this flag bypasses normal "git aware" transport
+ mechanism and clones the repository by making a copy of
+ HEAD and everything under objects and refs directories.
+ The files under .git/objects/ directory are hardlinked
+ to save space when possible.
+
+-q::
+ Operate quietly. This flag is passed to "rsync" and
+ "git-clone-pack" commands when given.
+
+-u <upload-pack>::
+ When given, and the repository to clone from is handled
+ by 'git-clone-pack', '--exec=<upload-pack>' is passed to
+ the command to specify non-default path for the command
+ run on the other end.
+
+<repository>::
+ The (possibly remote) repository to clone from. It can
+ be an "rsync://host/dir" URL, an "http://host/dir" URL,
+ or [<host>:]/dir notation that is used by 'git-clone-pack'.
+ Currently http transport is not supported.
+
+<directory>::
+ The name of a new directory to be cloned into. It is an
+ error to specify an existing directory.
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-commit-tree.txt b/Documentation/git-commit-tree.txt
new file mode 100644
index 0000000..5536668
--- /dev/null
+++ b/Documentation/git-commit-tree.txt
@@ -0,0 +1,92 @@
+git-commit-tree(1)
+==================
+v0.1, May 2005
+
+NAME
+----
+git-commit-tree - Creates a new commit object
+
+
+SYNOPSIS
+--------
+'git-commit-tree' <tree> [-p <parent commit>]\ < changelog
+
+DESCRIPTION
+-----------
+Creates a new commit object based on the provided tree object and
+emits the new commit object id on stdout. If no parent is given then
+it is considered to be an initial tree.
+
+A commit object usually has 1 parent (a commit after a change) or up
+to 16 parents. More than one parent represents a merge of branches
+that led to them.
+
+While a tree represents a particular directory state of a working
+directory, a commit represents that state in "time", and explains how
+to get there.
+
+Normally a commit would identify a new "HEAD" state, and while git
+doesn't care where you save the note about that state, in practice we
+tend to just write the result to the file `.git/HEAD`, so that we can
+always see what the last committed state was.
+
+OPTIONS
+-------
+<tree>::
+ An existing tree object
+
+-p <parent commit>::
+ Each '-p' indicates a the id of a parent commit object.
+
+
+Commit Information
+------------------
+
+A commit encapsulates:
+
+- all parent object ids
+- author name, email and date
+- committer name and email and the commit time.
+
+If not provided, "git-commit-tree" uses your name, hostname and domain to
+provide author and committer info. This can be overridden using the
+following environment variables.
+
+ GIT_AUTHOR_NAME
+ GIT_AUTHOR_EMAIL
+ GIT_AUTHOR_DATE
+ GIT_COMMITTER_NAME
+ GIT_COMMITTER_EMAIL
+
+(nb <,> and '\n's are stripped)
+
+A commit comment is read from stdin (max 999 chars). If a changelog
+entry is not provided via '<' redirection, "git-commit-tree" will just wait
+for one to be entered and terminated with ^D
+
+Diagnostics
+-----------
+You don't exist. Go away!::
+ The passwd(5) gecos field couldn't be read
+Your parents must have hated you!::
+ The password(5) gecos field is longer than a giant static buffer.
+Your sysadmin must hate you!::
+ The password(5) name field is longer than a giant static buffer.
+
+See Also
+--------
+link:git-write-tree.html[git-write-tree]
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-convert-cache.txt b/Documentation/git-convert-cache.txt
new file mode 100644
index 0000000..66d7fe7
--- /dev/null
+++ b/Documentation/git-convert-cache.txt
@@ -0,0 +1,30 @@
+git-convert-cache(1)
+====================
+v0.1, May 2005
+
+NAME
+----
+git-convert-cache - Converts old-style GIT repository
+
+
+SYNOPSIS
+--------
+'git-convert-cache'
+
+DESCRIPTION
+-----------
+Converts old-style GIT repository to the latest format
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-cvsimport-script.txt b/Documentation/git-cvsimport-script.txt
new file mode 100644
index 0000000..05145e9
--- /dev/null
+++ b/Documentation/git-cvsimport-script.txt
@@ -0,0 +1,80 @@
+git-cvsimport-script(1)
+=======================
+v0.1, July 2005
+
+NAME
+----
+git-cvsimport-script - Import a CVS repository into git
+
+
+SYNOPSIS
+--------
+'git-cvsimport-script' [ -o <branch-for-HEAD> ] [ -h ] [ -v ]
+ [ -d <CVSROOT> ] [ -p <options-for-cvsps> ]
+ [ -C <GIT_repository> ] [ -i ] [ <CVS_module> ]
+
+
+DESCRIPTION
+-----------
+Imports a CVS repository into git. It will either create a new
+repository, or incrementally import into an existing one.
+
+Splitting the CVS log into patch sets is done by 'cvsps'.
+At least version 2.1 is required.
+
+OPTIONS
+-------
+-d <CVSROOT>::
+ The root of the CVS archive. May be local (a simple path) or remote;
+ currently, only the :local:, :ext: and :pserver: access methods
+ are supported.
+
+-i::
+ Import-only: don't perform a checkout after importing. This option
+ ensures the working directory and cache remain untouched and will
+ not create them if they do not exist.
+
+-o <branch-for-HEAD>::
+ The 'HEAD' branch from CVS is imported to the 'origin' branch within
+ the git repository, as 'HEAD' already has a special meaning for git.
+ Use this option if you want to import into a different branch.
+
+ Use '-o master' for continuing an import that was initially done by
+ the old cvs2git tool.
+
+-p <options-for-cvsps>::
+ Additional options for cvsps.
+ The options '-x' and '-A' are implicit and should not be used here.
+
+ If you need to pass multiple options, separate them with a comma.
+
+-v::
+ Verbosity: let 'cvsimport' report what it is doing.
+
+<CVS_module>::
+ The CVS module you want to import. Relative to <CVSROOT>.
+
+-h::
+ Print a short usage message and exit.
+
+OUTPUT
+------
+If '-v' is specified, the script reports what it is doing.
+
+Otherwise, success is indicated the Unix way, i.e. by simply exiting with
+a zero exit status.
+
+
+Author
+------
+Written by Matthias Urlichs <smurf@smurf.noris.de>, with help from
+various participants of the git-list <git@vger.kernel.org>.
+
+Documentation
+--------------
+Documentation by Matthias Urlichs <smurf@smurf.noris.de>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-diff-cache.txt b/Documentation/git-diff-cache.txt
new file mode 100644
index 0000000..b463593
--- /dev/null
+++ b/Documentation/git-diff-cache.txt
@@ -0,0 +1,134 @@
+git-diff-cache(1)
+=================
+v0.1, May 2005
+
+NAME
+----
+git-diff-cache - Compares content and mode of blobs between the cache and repository
+
+
+SYNOPSIS
+--------
+'git-diff-cache' [-m] [--cached] [<common diff options>] <tree-ish> [<path>...]
+
+DESCRIPTION
+-----------
+Compares the content and mode of the blobs found via a tree
+object with the content of the current cache and, optionally
+ignoring the stat state of the file on disk. When paths are
+specified, compares only those named paths. Otherwise all
+entries in the cache are compared.
+
+OPTIONS
+-------
+include::diff-options.txt[]
+
+<tree-ish>::
+ The id of a tree object to diff against.
+
+--cached::
+ do not consider the on-disk file at all
+
+-m::
+ By default, files recorded in the index but not checked
+ out are reported as deleted. This flag makes
+ "git-diff-cache" say that all non-checked-out files are up
+ to date.
+
+Output format
+-------------
+include::diff-format.txt[]
+
+Operating Modes
+---------------
+You can choose whether you want to trust the index file entirely
+(using the '--cached' flag) or ask the diff logic to show any files
+that don't match the stat state as being "tentatively changed". Both
+of these operations are very useful indeed.
+
+Cached Mode
+-----------
+If '--cached' is specified, it allows you to ask:
+
+ show me the differences between HEAD and the current index
+ contents (the ones I'd write with a "git-write-tree")
+
+For example, let's say that you have worked on your index file, and are
+ready to commit. You want to see eactly *what* you are going to commit is
+without having to write a new tree object and compare it that way, and to
+do that, you just do
+
+ git-diff-cache --cached $(cat .git/HEAD)
+
+Example: let's say I had renamed `commit.c` to `git-commit.c`, and I had
+done an "git-update-cache" to make that effective in the index file.
+"git-diff-files" wouldn't show anything at all, since the index file
+matches my working directory. But doing a "git-diff-cache" does:
+
+ torvalds@ppc970:~/git> git-diff-cache --cached $(cat .git/HEAD)
+ -100644 blob 4161aecc6700a2eb579e842af0b7f22b98443f74 commit.c
+ +100644 blob 4161aecc6700a2eb579e842af0b7f22b98443f74 git-commit.c
+
+You can trivially see that the above is a rename.
+
+In fact, "git-diff-cache --cached" *should* always be entirely equivalent to
+actually doing a "git-write-tree" and comparing that. Except this one is much
+nicer for the case where you just want to check where you are.
+
+So doing a "git-diff-cache --cached" is basically very useful when you are
+asking yourself "what have I already marked for being committed, and
+what's the difference to a previous tree".
+
+Non-cached Mode
+---------------
+The "non-cached" mode takes a different approach, and is potentially
+the more useful of the two in that what it does can't be emulated with
+a "git-write-tree" + "git-diff-tree". Thus that's the default mode.
+The non-cached version asks the question:
+
+ show me the differences between HEAD and the currently checked out
+ tree - index contents _and_ files that aren't up-to-date
+
+which is obviously a very useful question too, since that tells you what
+you *could* commit. Again, the output matches the "git-diff-tree -r"
+output to a tee, but with a twist.
+
+The twist is that if some file doesn't match the cache, we don't have
+a backing store thing for it, and we use the magic "all-zero" sha1 to
+show that. So let's say that you have edited `kernel/sched.c`, but
+have not actually done a "git-update-cache" on it yet - there is no
+"object" associated with the new state, and you get:
+
+ torvalds@ppc970:~/v2.6/linux> git-diff-cache $(cat .git/HEAD )
+ *100644->100664 blob 7476bb......->000000...... kernel/sched.c
+
+ie it shows that the tree has changed, and that `kernel/sched.c` has is
+not up-to-date and may contain new stuff. The all-zero sha1 means that to
+get the real diff, you need to look at the object in the working directory
+directly rather than do an object-to-object diff.
+
+NOTE! As with other commands of this type, "git-diff-cache" does not
+actually look at the contents of the file at all. So maybe
+`kernel/sched.c` hasn't actually changed, and it's just that you
+touched it. In either case, it's a note that you need to
+"git-upate-cache" it to make the cache be in sync.
+
+NOTE 2! You can have a mixture of files show up as "has been updated"
+and "is still dirty in the working directory" together. You can always
+tell which file is in which state, since the "has been updated" ones
+show a valid sha1, and the "not in sync with the index" ones will
+always have the special all-zero sha1.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-diff-files.txt b/Documentation/git-diff-files.txt
new file mode 100644
index 0000000..3e1244f
--- /dev/null
+++ b/Documentation/git-diff-files.txt
@@ -0,0 +1,44 @@
+git-diff-files(1)
+=================
+v0.1, May 2005
+
+NAME
+----
+git-diff-files - Compares files in the working tree and the cache
+
+
+SYNOPSIS
+--------
+'git-diff-files' [-q] [<common diff options>] [<path>...]
+
+DESCRIPTION
+-----------
+Compares the files in the working tree and the cache. When paths
+are specified, compares only those named paths. Otherwise all
+entries in the cache are compared. The output format is the
+same as "git-diff-cache" and "git-diff-tree".
+
+OPTIONS
+-------
+include::diff-options.txt[]
+
+-q::
+ Remain silent even on nonexisting files
+
+Output format
+-------------
+include::diff-format.txt[]
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-diff-helper.txt b/Documentation/git-diff-helper.txt
new file mode 100644
index 0000000..d826deb
--- /dev/null
+++ b/Documentation/git-diff-helper.txt
@@ -0,0 +1,53 @@
+git-diff-helper(1)
+==================
+v0.1, May 2005
+
+NAME
+----
+git-diff-helper - Generates patch format output for git-diff-*
+
+
+SYNOPSIS
+--------
+'git-diff-helper' [-z] [-S<string>] [-O<orderfile>]
+
+DESCRIPTION
+-----------
+Reads output from "git-diff-cache", "git-diff-tree" and "git-diff-files" and
+generates patch format output.
+
+OPTIONS
+-------
+-z::
+ \0 line termination on input
+
+-S<string>::
+ Look for differences that contains the change in <string>.
+
+--pickaxe-all::
+ When -S finds a change, show all the changes in that
+ changeset, not just the files that contains the change
+ in <string>.
+
+-O<orderfile>::
+ Output the patch in the order specified in the
+ <orderfile>, which has one shell glob pattern per line.
+
+See Also
+--------
+The section on generating patches in link:git-diff-cache.html[git-diff-cache]
+
+
+Author
+------
+Written by Junio C Hamano <junkio@cox.net>
+
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-diff-stages.txt b/Documentation/git-diff-stages.txt
new file mode 100644
index 0000000..2084c70
--- /dev/null
+++ b/Documentation/git-diff-stages.txt
@@ -0,0 +1,41 @@
+git-diff-stages(1)
+==================
+v0.1, June 2005
+
+NAME
+----
+git-diff-stages - Compares content and mode of blobs between stages in an unmerged index file.
+
+
+SYNOPSIS
+--------
+'git-diff-stages' [<common diff options>] <stage1> <stage2> [<path>...]
+
+DESCRIPTION
+-----------
+Compares the content and mode of the blobs in two stages in an
+unmerged index file.
+
+OPTIONS
+-------
+include::diff-options.txt[]
+
+<stage1>,<stage2>::
+ The stage number to be compared.
+
+Output format
+-------------
+include::diff-format.txt[]
+
+
+Author
+------
+Written by Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+GIT
+---
+Part of the link:git.html[git] suite
diff --git a/Documentation/git-diff-tree.txt b/Documentation/git-diff-tree.txt
new file mode 100644
index 0000000..b2095cb
--- /dev/null
+++ b/Documentation/git-diff-tree.txt
@@ -0,0 +1,130 @@
+git-diff-tree(1)
+================
+v0.1, May 2005
+
+NAME
+----
+git-diff-tree - Compares the content and mode of blobs found via two tree objects
+
+
+SYNOPSIS
+--------
+'git-diff-tree' [--stdin] [-m] [-s] [-v] [--pretty] [-t] [<common diff options>] <tree-ish> <tree-ish> [<path>...]
+
+DESCRIPTION
+-----------
+Compares the content and mode of the blobs found via two tree objects.
+
+Note that "git-diff-tree" can use the tree encapsulated in a commit object.
+
+OPTIONS
+-------
+include::diff-options.txt[]
+
+<tree-ish>::
+ The id of a tree object.
+
+<path>...::
+ If provided, the results are limited to a subset of files
+ matching one of these prefix strings.
+ ie file matches `/^<pattern1>|<pattern2>|.../`
+ Note that this parameter does not provide any wildcard or regexp
+ features.
+
+-t::
+ show tree entry itself as well as subtrees. Implies -r.
+
+--root::
+ When '--root' is specified the initial commit will be showed as a big
+ creation event. This is equivalent to a diff against the NULL tree.
+
+--stdin::
+ When '--stdin' is specified, the command does not take
+ <tree-ish> arguments from the command line. Instead, it
+ reads either one <commit> or a pair of <tree-ish>
+ separated with a single space from its standard input.
++
+When a single commit is given on one line of such input, it compares
+the commit with its parents. The following flags further affects its
+behaviour. This does not apply to the case where two <tree-ish>
+separated with a single space are given.
+
+-m::
+ By default, "git-diff-tree --stdin" does not show
+ differences for merge commits. With this flag, it shows
+ differences to that commit from all of its parents.
+
+-s::
+ By default, "git-diff-tree --stdin" shows differences,
+ either in machine-readable form (without '-p') or in patch
+ form (with '-p'). This output can be supressed. It is
+ only useful with '-v' flag.
+
+-v::
+ This flag causes "git-diff-tree --stdin" to also show
+ the commit message before the differences.
+
+--pretty[=(raw|medium|short)]::
+ This is used to control "pretty printing" format of the
+ commit message. Without "=<style>", it defaults to
+ medium.
+
+
+Limiting Output
+---------------
+If you're only interested in differences in a subset of files, for
+example some architecture-specific files, you might do:
+
+ git-diff-tree -r <tree-ish> <tree-ish> arch/ia64 include/asm-ia64
+
+and it will only show you what changed in those two directories.
+
+Or if you are searching for what changed in just `kernel/sched.c`, just do
+
+ git-diff-tree -r <tree-ish> <tree-ish> kernel/sched.c
+
+and it will ignore all differences to other files.
+
+The pattern is always the prefix, and is matched exactly. There are no
+wildcards. Even stricter, it has to match complete path comonent.
+I.e. "foo" does not pick up `foobar.h`. "foo" does match `foo/bar.h`
+so it can be used to name subdirectories.
+
+An example of normal usage is:
+
+ torvalds@ppc970:~/git> git-diff-tree 5319e4......
+ *100664->100664 blob ac348b.......->a01513....... git-fsck-cache.c
+
+which tells you that the last commit changed just one file (it's from
+this one:
+
+ commit 3c6f7ca19ad4043e9e72fa94106f352897e651a8
+ tree 5319e4d609cdd282069cc4dce33c1db559539b03
+ parent b4e628ea30d5ab3606119d2ea5caeab141d38df7
+ author Linus Torvalds <torvalds@ppc970.osdl.org> Sat Apr 9 12:02:30 2005
+ committer Linus Torvalds <torvalds@ppc970.osdl.org> Sat Apr 9 12:02:30 2005
+
+ Make "git-fsck-cache" print out all the root commits it finds.
+
+ Once I do the reference tracking, I'll also make it print out all the
+ HEAD commits it finds, which is even more interesting.
+
+in case you care).
+
+Output format
+-------------
+include::diff-format.txt[]
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-export.txt b/Documentation/git-export.txt
new file mode 100644
index 0000000..d2d0dc4
--- /dev/null
+++ b/Documentation/git-export.txt
@@ -0,0 +1,31 @@
+git-export(1)
+=============
+v0.1, May 2005
+
+NAME
+----
+git-export - Exports each commit and a diff against each of its parents
+
+
+SYNOPSIS
+--------
+'git-export' top [base]
+
+DESCRIPTION
+-----------
+Exports each commit and diff against each of its parents, between
+top and base. If base is not specified it exports everything.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-fetch-pack.txt b/Documentation/git-fetch-pack.txt
new file mode 100644
index 0000000..884a26b
--- /dev/null
+++ b/Documentation/git-fetch-pack.txt
@@ -0,0 +1,73 @@
+git-fetch-pack(1)
+=================
+v0.1, July 2005
+
+NAME
+----
+git-fetch-pack - Receive missing objects from another repository.
+
+
+SYNOPSIS
+--------
+git-fetch-pack [-q] [--exec=<git-upload-pack>] [<host>:]<directory> [<head>...] < <commit-list>
+
+DESCRIPTION
+-----------
+Invokes 'git-upload-pack' on a potentially remote repository,
+and asks it to send objects missing from this repository, to
+update the named heads. The list of commits available locally
+is fed from the standard input, to be sent to 'git-upload-pack'
+running on the other end.
+
+This command can be used only when the local side has a common
+(ancestor) commit with the remote head that is being pulled
+from. Use 'git-clone-pack' for that.
+
+
+OPTIONS
+-------
+-q::
+ Pass '-q' flag to 'git-unpack-objects'; this makes the
+ cloning process less verbose.
+
+--exec=<git-upload-pack>::
+ Use this to specify the path to 'git-upload-pack' on the
+ remote side, if is not found on your $PATH.
+ Installations of sshd ignores the user's environment
+ setup scripts for login shells (e.g. .bash_profile) and
+ your privately installed GIT may not be found on the system
+ default $PATH. Another workaround suggested is to set
+ up your $PATH in ".bashrc", but this flag is for people
+ who do not want to pay the overhead for non-interactive
+ shells by having a lean .bashrc file (they set most of
+ the things up in .bash_profile).
+
+<host>::
+ A remote host that houses the repository. When this
+ part is specified, 'git-upload-pack' is invoked via
+ ssh.
+
+<directory>::
+ The repository to sync from.
+
+<head>...::
+ The remote heads to update from. This is relative to
+ $GIT_DIR (e.g. "HEAD", "refs/heads/master"). When
+ unspecified, update from all heads the remote side has.
+
+ However the program refuses to work if more than one
+ remote head matches the specified heads. I am not sure
+ what this means... Help!!!!!
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+GIT
+---
+Part of the link:git.html[git] suite
diff --git a/Documentation/git-fetch-script.txt b/Documentation/git-fetch-script.txt
new file mode 100644
index 0000000..937df05
--- /dev/null
+++ b/Documentation/git-fetch-script.txt
@@ -0,0 +1,47 @@
+git-fetch-script(1)
+===================
+v0.1, July 2005
+
+NAME
+----
+git-fetch-script - Download objects and a head from another repository.
+
+
+SYNOPSIS
+--------
+'git-fetch-script' <repository> [ <head> | tag <tag> ]
+
+
+DESCRIPTION
+-----------
+Fetches a named head or a tag from another repository, along
+with the objects necessary to complete that head or tag. The
+head to pull defaults to HEAD if unspecified. The head or tag
+fetched from the remote repository is stored in
+$GIT_DIR/FETCH_HEAD.
+
+When a <tag> is specified, the <tag> fetched from the remote is
+also copied to the local $GIT_DIR/tags/<tag> file. When no
+<head> nor <tag> is specified, and <repository> was specified
+with the short-hand notation (i.e. naming a file under the
+$GIT_DIR/branches directory), the head fetched from the remote
+repository is also copied to the local $GIT_DIR/heads/<repository>
+file.
+
+
+OPTIONS
+-------
+include::pull-fetch-param.txt[]
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org> and Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
diff --git a/Documentation/git-fsck-cache.txt b/Documentation/git-fsck-cache.txt
new file mode 100644
index 0000000..aaec2de
--- /dev/null
+++ b/Documentation/git-fsck-cache.txt
@@ -0,0 +1,149 @@
+git-fsck-cache(1)
+=================
+v0.1, May 2005
+
+NAME
+----
+git-fsck-cache - Verifies the connectivity and validity of the objects in the database
+
+
+SYNOPSIS
+--------
+'git-fsck-cache' [--tags] [--root] [--unreachable] [--cache] [--standalone | --full] [--strict] [<object>*]
+
+DESCRIPTION
+-----------
+Verifies the connectivity and validity of the objects in the database.
+
+OPTIONS
+-------
+<object>::
+ An object to treat as the head of an unreachability trace.
+
+ If no objects are given, git-fsck-cache defaults to using the
+ index file and all SHA1 references in .git/refs/* as heads.
+
+--unreachable::
+ Print out objects that exist but that aren't readable from any
+ of the reference nodes.
+
+--root::
+ Report root nodes.
+
+--tags::
+ Report tags.
+
+--cache::
+ Consider any object recorded in the cache also as a head node for
+ an unreachability trace.
+
+--standalone::
+ Limit checks to the contents of GIT_OBJECT_DIRECTORY
+ (.git/objects), making sure that it is consistent and
+ complete without referring to objects found in alternate
+ object pools listed in GIT_ALTERNATE_OBJECT_DIRECTORIES,
+ nor packed GIT archives found in .git/objects/pack;
+ cannot be used with --full.
+
+--full::
+ Check not just objects in GIT_OBJECT_DIRECTORY
+ (.git/objects), but also the ones found in alternate
+ object pools listed in GIT_ALTERNATE_OBJECT_DIRECTORIES,
+ and in packed GIT archives found in .git/objects/pack
+ and corresponding pack subdirectories in alternate
+ object pools; cannot be used with --standalone.
+
+--strict::
+ Enable more strict checking, namely to catch a file mode
+ recorded with g+w bit set, which was created by older
+ versions of GIT. Existing repositories, including the
+ Linux kernel, GIT itself, and sparse repository have old
+ objects that triggers this check, but it is recommended
+ to check new projects with this flag.
+
+It tests SHA1 and general object sanity, and it does full tracking of
+the resulting reachability and everything else. It prints out any
+corruption it finds (missing or bad objects), and if you use the
+'--unreachable' flag it will also print out objects that exist but
+that aren't readable from any of the specified head nodes.
+
+So for example
+
+ git-fsck-cache --unreachable $(cat .git/HEAD)
+
+or, for Cogito users:
+
+ git-fsck-cache --unreachable $(cat .git/refs/heads/*)
+
+will do quite a _lot_ of verification on the tree. There are a few
+extra validity tests to be added (make sure that tree objects are
+sorted properly etc), but on the whole if "git-fsck-cache" is happy, you
+do have a valid tree.
+
+Any corrupt objects you will have to find in backups or other archives
+(ie you can just remove them and do an "rsync" with some other site in
+the hopes that somebody else has the object you have corrupted).
+
+Of course, "valid tree" doesn't mean that it wasn't generated by some
+evil person, and the end result might be crap. Git is a revision
+tracking system, not a quality assurance system ;)
+
+Extracted Diagnostics
+---------------------
+
+expect dangling commits - potential heads - due to lack of head information::
+ You haven't specified any nodes as heads so it won't be
+ possible to differentiate between un-parented commits and
+ root nodes.
+
+missing sha1 directory '<dir>'::
+ The directory holding the sha1 objects is missing.
+
+unreachable <type> <object>::
+ The <type> object <object>, isn't actually referred to directly
+ or indirectly in any of the trees or commits seen. This can
+ mean that there's another root node that you're not specifying
+ or that the tree is corrupt. If you haven't missed a root node
+ then you might as well delete unreachable nodes since they
+ can't be used.
+
+missing <type> <object>::
+ The <type> object <object>, is referred to but isn't present in
+ the database.
+
+dangling <type> <object>::
+ The <type> object <object>, is present in the database but never
+ 'directly' used. A dangling commit could be a root node.
+
+warning: git-fsck-cache: tree <tree> has full pathnames in it::
+ And it shouldn't...
+
+sha1 mismatch <object>::
+ The database has an object who's sha1 doesn't match the
+ database value.
+ This indicates a serious data integrity problem.
+ (note: this error occured during early git development when
+ the database format changed.)
+
+Environment Variables
+---------------------
+
+GIT_OBJECT_DIRECTORY::
+ used to specify the object database root (usually .git/objects)
+
+GIT_INDEX_FILE::
+ used to specify the cache
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-hash-object.txt b/Documentation/git-hash-object.txt
new file mode 100644
index 0000000..8790d19
--- /dev/null
+++ b/Documentation/git-hash-object.txt
@@ -0,0 +1,36 @@
+git-hash-object(1)
+==================
+v0.1, May 2005
+
+NAME
+----
+git-hash-object - Computes object ID and optionally creates a blob from a file.
+
+
+SYNOPSIS
+--------
+'git-hash-object' [-t <type>] [-w] <any-file-on-the-filesystem>
+
+DESCRIPTION
+-----------
+Computes the object ID value for an object with specified type
+with the contents of the named file (which can be outside of the
+work tree), and optionally writes the resulting object into the
+object database. Reports its object ID to its standard output.
+This is used by "git-cvsimport-script" to update the cache
+without modifying files in the work tree. When <type> is not
+specified, it defaults to "blob".
+
+
+Author
+------
+Written by Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-http-pull.txt b/Documentation/git-http-pull.txt
new file mode 100644
index 0000000..431ef7c
--- /dev/null
+++ b/Documentation/git-http-pull.txt
@@ -0,0 +1,41 @@
+git-http-pull(1)
+================
+v0.1, May 2005
+
+NAME
+----
+git-http-pull - Downloads a remote GIT repository via HTTP
+
+
+SYNOPSIS
+--------
+'git-http-pull' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] commit-id url
+
+DESCRIPTION
+-----------
+Downloads a remote GIT repository via HTTP.
+
+-c::
+ Get the commit objects.
+-t::
+ Get trees associated with the commit objects.
+-a::
+ Get all the objects.
+-v::
+ Report what is downloaded.
+-w::
+ Writes the commit-id into the filename under $GIT_DIR/refs/ on
+ the local end after the transfer is complete.
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-init-db.txt b/Documentation/git-init-db.txt
new file mode 100644
index 0000000..99f96f7
--- /dev/null
+++ b/Documentation/git-init-db.txt
@@ -0,0 +1,40 @@
+git-init-db(1)
+==============
+v0.1, May 2005
+
+NAME
+----
+git-init-db - Creates an empty git object database
+
+
+SYNOPSIS
+--------
+'git-init-db'
+
+DESCRIPTION
+-----------
+This simply creates an empty git object database - basically a `.git`
+directory and `.git/object/??/` directories.
+
+If the 'GIT_DIR' environment variable is set then it specifies a path
+to use instead of `./.git` for the base of the repository.
+
+If the object storage directory is specified via the 'GIT_OBJECT_DIRECTORY'
+environment variable then the sha1 directories are created underneath -
+otherwise the default `.git/objects` directory is used.
+
+"git-init-db" won't hurt an existing repository.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-local-pull.txt b/Documentation/git-local-pull.txt
new file mode 100644
index 0000000..cb95334
--- /dev/null
+++ b/Documentation/git-local-pull.txt
@@ -0,0 +1,43 @@
+git-local-pull(1)
+=================
+v0.1, May 2005
+
+NAME
+----
+git-local-pull - Duplicates another GIT repository on a local system
+
+
+SYNOPSIS
+--------
+'git-local-pull' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [-l] [-s] [-n] commit-id path
+
+DESCRIPTION
+-----------
+Duplicates another GIT repository on a local system.
+
+OPTIONS
+-------
+-c::
+ Get the commit objects.
+-t::
+ Get trees associated with the commit objects.
+-a::
+ Get all the objects.
+-v::
+ Report what is downloaded.
+-w::
+ Writes the commit-id into the filename under $GIT_DIR/refs/ on
+ the local end after the transfer is complete.
+
+Author
+------
+Written by Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
new file mode 100644
index 0000000..7ac6c7d
--- /dev/null
+++ b/Documentation/git-ls-files.txt
@@ -0,0 +1,196 @@
+git-ls-files(1)
+===============
+v0.1, May 2005
+
+NAME
+----
+git-ls-files - Information about files in the cache/working directory
+
+
+SYNOPSIS
+--------
+'git-ls-files' [-z] [-t]
+ (--[cached|deleted|others|ignored|stage|unmerged|killed])\*
+ (-[c|d|o|i|s|u|k])\*
+ [-x <pattern>|--exclude=<pattern>]
+ [-X <file>|--exclude-from=<file>]
+ [--exclude-per-directory=<file>]
+
+DESCRIPTION
+-----------
+This merges the file listing in the directory cache index with the
+actual working directory list, and shows different combinations of the
+two.
+
+One or more of the options below may be used to determine the files
+shown:
+
+OPTIONS
+-------
+-c|--cached::
+ Show cached files in the output (default)
+
+-d|--deleted::
+ Show deleted files in the output
+
+-o|--others::
+ Show other files in the output
+
+-i|--ignored::
+ Show ignored files in the output
+ Note the this also reverses any exclude list present.
+
+-s|--stage::
+ Show stage files in the output
+
+-u|--unmerged::
+ Show unmerged files in the output (forces --stage)
+
+-k|--killed::
+ Show files on the filesystem that need to be removed due
+ to file/directory conflicts for checkout-cache to
+ succeed.
+
+-z::
+ \0 line termination on output
+
+-x|--exclude=<pattern>::
+ Skips files matching pattern.
+ Note that pattern is a shell wildcard pattern.
+
+-X|--exclude-from=<file>::
+ exclude patterns are read from <file>; 1 per line.
+
+--exclude-per-directory=<file>::
+ read additional exclude patterns that apply only to the
+ directory and its subdirectories in <file>.
+
+-t::
+ Identify the file status with the following tags (followed by
+ a space) at the start of each line:
+ H cached
+ M unmerged
+ R removed/deleted
+ K to be killed
+ ? other
+
+Output
+------
+show files just outputs the filename unless '--stage' is specified in
+which case it outputs:
+
+ [<tag> ]<mode> <object> <stage> <file>
+
+"git-ls-files --unmerged" and "git-ls-files --stage" can be used to examine
+detailed information on unmerged paths.
+
+For an unmerged path, instead of recording a single mode/SHA1 pair,
+the dircache records up to three such pairs; one from tree O in stage
+1, A in stage 2, and B in stage 3. This information can be used by
+the user (or Cogito) to see what should eventually be recorded at the
+path. (see read-cache for more information on state)
+
+
+Exclude Patterns
+----------------
+
+'git-ls-files' can use a list of "exclude patterns" when
+traversing the directory tree and finding files to show when the
+flags --others or --ignored are specified.
+
+These exclude patterns come from these places:
+
+ (1) command line flag --exclude=<pattern> specifies a single
+ pattern.
+
+ (2) command line flag --exclude-from=<file> specifies a list of
+ patterns stored in a file.
+
+ (3) command line flag --exclude-per-directory=<name> specifies
+ a name of the file in each directory 'git-ls-files'
+ examines, and if exists, its contents are used as an
+ additional list of patterns.
+
+An exclude pattern file used by (2) and (3) contains one pattern
+per line. A line that starts with a '#' can be used as comment
+for readability.
+
+The list of patterns that is in effect at a given time is
+built and ordered in the following way:
+
+ * --exclude=<pattern> and lines read from --exclude-from=<file>
+ come at the beginning of the list of patterns, in the order
+ given on the command line. Patterns that come from the file
+ specified with --exclude-from are ordered in the same order
+ as they appear in the file.
+
+ * When --exclude-per-directory=<name> is specified, upon
+ entering a directory that has such a file, its contents are
+ appended at the end of the current "list of patterns". They
+ are popped off when leaving the directory.
+
+Each pattern in the pattern list specifies "a match pattern" and
+optionally the fate --- either a file that matches the pattern
+is considered excluded or included. By default, this being
+"exclude" mechanism, the fate is "excluded". A filename is
+examined against the patterns in the list, and the first match
+determines its fate.
+
+A pattern specified on the command line with --exclude or read
+from the file specified with --exclude-from is relative to the
+top of the directory tree. A pattern read from a file specified
+by --exclude-per-directory is relative to the directory that the
+pattern file appears in.
+
+An exclude pattern is of the following format:
+
+ - an optional prefix '!' which means that the fate this pattern
+ specifies is "include", not the usual "exclude"; the
+ remainder of the pattern string is interpreted according to
+ the following rules.
+
+ - if it does not contain a slash '/', it is a shell glob
+ pattern and used to match against the filename without
+ leading directories (i.e. the same way as the current
+ implementation).
+
+ - otherwise, it is a shell glob pattern, suitable for
+ consumption by fnmatch(3) with FNM_PATHNAME flag. I.e. a
+ slash in the pattern must match a slash in the pathname.
+ "Documentation/*.html" matches "Documentation/git.html" but
+ not "ppc/ppc.html". As a natural exception, "/*.c" matches
+ "cat-file.c" but not "mozilla-sha1/sha1.c".
+
+An example:
+
+ $ cat .git/ignore
+ # ignore objects and archives, anywhere in the tree.
+ *.[oa]
+ $ cat Documentation/.gitignore
+ # ignore generated html files,
+ # except foo.html which is maintained by hand
+ !foo.html
+ *.html
+ $ git-ls-files --ignored \
+ --exclude='Documentation/*.[0-9]' \
+ --exclude-from=.git/ignore \
+ --exclude-per-directory=.gitignore
+
+
+See Also
+--------
+link:read-cache.html[read-cache]
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-ls-tree.txt b/Documentation/git-ls-tree.txt
new file mode 100644
index 0000000..958b56d
--- /dev/null
+++ b/Documentation/git-ls-tree.txt
@@ -0,0 +1,55 @@
+git-ls-tree(1)
+==============
+v0.1, May 2005
+
+NAME
+----
+git-ls-tree - Lists the contents of a tree object.
+
+
+SYNOPSIS
+--------
+'git-ls-tree' [-d] [-r] [-z] <tree-ish> [paths...]
+
+DESCRIPTION
+-----------
+Lists the contents of a tree object, like what "/bin/ls -a" does
+in the current working directory.
+
+OPTIONS
+-------
+<tree-ish>::
+ Id of a tree.
+
+-d::
+ show only the named tree entry itself, not its children
+
+-r::
+ recurse into sub-trees
+
+-z::
+ \0 line termination on output
+
+paths::
+ When paths are given, shows them. Otherwise implicitly
+ uses the root level of the tree as the sole path argument.
+
+
+Output Format
+-------------
+ <mode> SP <type> SP <object> TAB <file>
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+Completely rewritten from scratch by Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-merge-base.txt b/Documentation/git-merge-base.txt
new file mode 100644
index 0000000..1e27bf2
--- /dev/null
+++ b/Documentation/git-merge-base.txt
@@ -0,0 +1,34 @@
+git-merge-base(1)
+=================
+v0.1, May 2005
+
+NAME
+----
+git-merge-base - Finds as good a common ancestor as possible for a merge
+
+
+SYNOPSIS
+--------
+'git-merge-base' <commit> <commit>
+
+DESCRIPTION
+-----------
+"git-merge-base" finds as good a common ancestor as possible. Given a
+selection of equally good common ancestors it should not be relied on
+to decide in any particular way.
+
+The "git-merge-base" algorithm is still in flux - use the source...
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-merge-cache.txt b/Documentation/git-merge-cache.txt
new file mode 100644
index 0000000..3fb4d49
--- /dev/null
+++ b/Documentation/git-merge-cache.txt
@@ -0,0 +1,84 @@
+git-merge-cache(1)
+==================
+v0.1, May 2005
+
+NAME
+----
+git-merge-cache - Runs a merge for files needing merging
+
+
+SYNOPSIS
+--------
+'git-merge-cache' [-o] <merge-program> (-a | -- | <file>\*)
+
+DESCRIPTION
+-----------
+This looks up the <file>(s) in the cache and, if there are any merge
+entries, passes the SHA1 hash for those files as arguments 1, 2, 3 (empty
+argument if no file), and <file> as argument 4. File modes for the three
+files are passed as arguments 5, 6 and 7.
+
+OPTIONS
+-------
+--::
+ Interpret all future arguments as filenames.
+
+-a::
+ Run merge against all files in the cache that need merging.
+
+-o::
+ Instead of stopping at the first failed merge, do all of them
+ in one shot - continue with merging even when previous merges
+ returned errors, and only return the error code after all the
+ merges are over.
+
+If "git-merge-cache" is called with multiple <file>s (or -a) then it
+processes them in turn only stopping if merge returns a non-zero exit
+code.
+
+Typically this is run with the a script calling the merge command from
+the RCS package.
+
+A sample script called "git-merge-one-file-script" is included in the
+ditribution.
+
+ALERT ALERT ALERT! The git "merge object order" is different from the
+RCS "merge" program merge object order. In the above ordering, the
+original is first. But the argument order to the 3-way merge program
+"merge" is to have the original in the middle. Don't ask me why.
+
+Examples:
+
+ torvalds@ppc970:~/merge-test> git-merge-cache cat MM
+ This is MM from the original tree. # original
+ This is modified MM in the branch A. # merge1
+ This is modified MM in the branch B. # merge2
+ This is modified MM in the branch B. # current contents
+
+or
+
+ torvalds@ppc970:~/merge-test> git-merge-cache cat AA MM
+ cat: : No such file or directory
+ This is added AA in the branch A.
+ This is added AA in the branch B.
+ This is added AA in the branch B.
+ fatal: merge program failed
+
+where the latter example shows how "git-merge-cache" will stop trying to
+merge once anything has returned an error (ie "cat" returned an error
+for the AA file, because it didn't exist in the original, and thus
+"git-merge-cache" didn't even try to merge the MM thing).
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+One-shot merge by Petr Baudis <pasky@ucw.cz>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-merge-one-file-script.txt b/Documentation/git-merge-one-file-script.txt
new file mode 100644
index 0000000..387601d
--- /dev/null
+++ b/Documentation/git-merge-one-file-script.txt
@@ -0,0 +1,30 @@
+git-merge-one-file-script(1)
+============================
+v0.1, May 2005
+
+NAME
+----
+git-merge-one-file-script - The standard helper program to use with "git-merge-cache"
+
+
+SYNOPSIS
+--------
+'git-merge-one-file-script'
+
+DESCRIPTION
+-----------
+This is the standard helper program to use with "git-merge-cache"
+to resolve a merge after the trivial merge done with "git-read-tree -m".
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-mktag.txt b/Documentation/git-mktag.txt
new file mode 100644
index 0000000..708f4ef
--- /dev/null
+++ b/Documentation/git-mktag.txt
@@ -0,0 +1,48 @@
+git-mktag(1)
+============
+v0.1, May 2005
+
+NAME
+----
+git-mktag - Creates a tag object
+
+
+SYNOPSIS
+--------
+'git-mktag' < signature_file
+
+DESCRIPTION
+-----------
+Reads a tag contents on standard input and creates a tag object
+that can also be used to sign other objects.
+
+The output is the new tag's <object> identifier.
+
+Tag Format
+----------
+A tag signature file has a very simple fixed format: three lines of
+
+ object <sha1>
+ type <typename>
+ tag <tagname>
+
+followed by some 'optional' free-form signature that git itself
+doesn't care about, but that can be verified with gpg or similar.
+
+The size of the full object is artificially limited to 8kB. (Just
+because I'm a lazy bastard, and if you can't fit a signature in that
+size, you're doing something wrong)
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt
new file mode 100644
index 0000000..ae6e88d
--- /dev/null
+++ b/Documentation/git-pack-objects.txt
@@ -0,0 +1,80 @@
+git-pack-objects(1)
+===================
+v0.1, July 2005
+
+NAME
+----
+git-pack-objects - Create a packed archive of objects.
+
+
+SYNOPSIS
+--------
+'git-pack-objects' [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list
+
+
+DESCRIPTION
+-----------
+Reads list of objects from the standard input, and writes a packed
+archive with specified base-name, or to the standard output.
+
+A packed archive is an efficient way to transfer set of objects
+between two repositories, and also is an archival format which
+is efficient to access. The packed archive format (.pack) is
+designed to be unpackable without having anything else, but for
+random access, accompanied with the pack index file (.idx).
+
+'git-unpack-objects' command can read the packed archive and
+expand the objects contained in the pack into "one-file
+one-object" format; this is typically done by the smart-pull
+commands when a pack is created on-the-fly for efficient network
+transport by their peers.
+
+Placing both in pack subdirectory of $GIT_OBJECT_DIRECTORY (or
+any of the directories on $GIT_ALTERNATE_OBJECT_DIRECTORIES)
+enables GIT to read from such an archive.
+
+
+OPTIONS
+-------
+base-name::
+ Write into a pair of files (.pack and .idx), using
+ <base-name> to determine the name of the created file.
+ When this option is used, the two files are written in
+ <base-name>-<SHA1>.{pack,idx} files. <SHA1> is a hash
+ of object names (currently in random order so it does
+ not have any useful meaning) to make the resulting
+ filename reasonably unique, and written to the standard
+ output of the command.
+
+--stdout::
+ Write the pack contents (what would have been writtin to
+ .pack file) out to the standard output.
+
+--window and --depth::
+ These two options affects how the objects contained in
+ the pack are stored using delta compression. The
+ objects are first internally sorted by type, size and
+ optionally names and compared against the other objects
+ within --window to see if using delta compression saves
+ space. --depth limits the maximum delta depth; making
+ it too deep affects the performance on the unpacker
+ side, because delta data needs to be applied that many
+ times to get to the necessary object.
+
+--incremental::
+ This flag causes an object already in a pack ignored
+ even if it appears in the standard input.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+-------------
+Documentation by Junio C Hamano
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-peek-remote.txt b/Documentation/git-peek-remote.txt
new file mode 100644
index 0000000..ab783c1
--- /dev/null
+++ b/Documentation/git-peek-remote.txt
@@ -0,0 +1,53 @@
+git-peek-remote(1)
+==================
+v0.1, July 2005
+
+NAME
+----
+git-peek-remote - Lists the references on a remote repository.
+
+
+SYNOPSIS
+--------
+'git-peek-remote' [--exec=<git-upload-pack>] [<host>:]<directory>
+
+DESCRIPTION
+-----------
+Lists the references the remote repository has, and optionally
+stores them in the local repository under the same name.
+
+OPTIONS
+-------
+--exec=<git-upload-pack>::
+ Use this to specify the path to 'git-upload-pack' on the
+ remote side, if is not found on your $PATH.
+ Installations of sshd ignores the user's environment
+ setup scripts for login shells (e.g. .bash_profile) and
+ your privately installed GIT may not be found on the system
+ default $PATH. Another workaround suggested is to set
+ up your $PATH in ".bashrc", but this flag is for people
+ who do not want to pay the overhead for non-interactive
+ shells by having a lean .bashrc file (they set most of
+ the things up in .bash_profile).
+
+<host>::
+ A remote host that houses the repository. When this
+ part is specified, 'git-upload-pack' is invoked via
+ ssh.
+
+<directory>::
+ The repository to sync from.
+
+
+Author
+------
+Written by Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-prune-script.txt b/Documentation/git-prune-script.txt
new file mode 100644
index 0000000..537b790
--- /dev/null
+++ b/Documentation/git-prune-script.txt
@@ -0,0 +1,32 @@
+git-prune-script(1)
+===================
+v0.1, May 2005
+
+NAME
+----
+git-prune-script - Prunes all unreachable objects from the object database
+
+
+SYNOPSIS
+--------
+'git-prune-script'
+
+DESCRIPTION
+-----------
+This runs "git-fsck-cache --unreachable" program using the heads specified
+on the command line (or `.git/refs/heads/\*` and `.git/refs/tags/\*` if none is
+specified), and prunes all unreachable objects from the object database.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-pull-script.txt b/Documentation/git-pull-script.txt
new file mode 100644
index 0000000..ec1e7a2
--- /dev/null
+++ b/Documentation/git-pull-script.txt
@@ -0,0 +1,37 @@
+git-pull-script(1)
+==================
+v0.1, May 2005
+
+NAME
+----
+git-pull-script - Pull and merge from another repository.
+
+
+SYNOPSIS
+--------
+'git-pull-script' <repository> [ <head> | tag <tag> ]
+
+
+DESCRIPTION
+-----------
+Runs 'git-fetch-script' with the given parameters, then
+'git-resolve-script' to merge the local HEAD and FETCH_HEAD.
+
+
+OPTIONS
+-------
+include::pull-fetch-param.txt[]
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org> and Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-read-tree.txt b/Documentation/git-read-tree.txt
new file mode 100644
index 0000000..7665946
--- /dev/null
+++ b/Documentation/git-read-tree.txt
@@ -0,0 +1,268 @@
+git-read-tree(1)
+================
+v0.1, May 2005
+
+NAME
+----
+git-read-tree - Reads tree information into the directory cache
+
+
+SYNOPSIS
+--------
+'git-read-tree' (<tree-ish> | [-m [-u]] <tree-ish1> [<tree-ish2> [<tree-ish3>]])
+
+
+DESCRIPTION
+-----------
+Reads the tree information given by <tree-ish> into the directory cache,
+but does not actually *update* any of the files it "caches". (see:
+git-checkout-cache)
+
+Optionally, it can merge a tree into the cache, perform a
+fast-forward (i.e. 2-way) merge, or a 3-way merge, with the -m
+flag. When used with -m, the -u flag causes it to also update
+the files in the work tree with the result of the merge.
+
+Trivial merges are done by "git-read-tree" itself. Only conflicting paths
+will be in unmerged state when "git-read-tree" returns.
+
+OPTIONS
+-------
+-m::
+ Perform a merge, not just a read.
+
+-u::
+ After a successful merge, update the files in the work
+ tree with the result of the merge.
+
+<tree-ish#>::
+ The id of the tree object(s) to be read/merged.
+
+
+Merging
+-------
+If '-m' is specified, "git-read-tree" can performs 3 kinds of
+merge, a single tree merge if only 1 tree is given, a
+fast-forward merge with 2 trees, or a 3-way merge if 3 trees are
+provided.
+
+
+Single Tree Merge
+~~~~~~~~~~~~~~~~~
+If only 1 tree is specified, git-read-tree operates as if the user did not
+specify '-m', except that if the original cache has an entry for a
+given pathname; and the contents of the path matches with the tree
+being read, the stat info from the cache is used. (In other words, the
+cache's stat()s take precedence over the merged tree's)
+
+That means that if you do a "git-read-tree -m <newtree>" followed by a
+"git-checkout-cache -f -u -a", the "git-checkout-cache" only checks out
+the stuff that really changed.
+
+This is used to avoid unnecessary false hits when "git-diff-files" is
+run after git-read-tree.
+
+
+Two Tree Merge
+~~~~~~~~~~~~~~
+
+Typically, this is invoked as "git-read-tree -m $H $M", where $H
+is the head commit of the current repository, and $M is the head
+of a foreign tree, which is simply ahead of $H (i.e. we are in a
+fast forward situation).
+
+When two trees are specified, the user is telling git-read-tree
+the following:
+
+ (1) The current index and work tree is derived from $H, but
+ the user may have local changes in them since $H;
+
+ (2) The user wants to fast-forward to $M.
+
+In this case, the "git-read-tree -m $H $M" command makes sure
+that no local change is lost as the result of this "merge".
+Here are the "carry forward" rules:
+
+ I (index) H M Result
+ -------------------------------------------------------
+ 0 nothing nothing nothing (does not happen)
+ 1 nothing nothing exists use M
+ 2 nothing exists nothing remove path from cache
+ 3 nothing exists exists use M
+
+ clean I==H I==M
+ ------------------
+ 4 yes N/A N/A nothing nothing keep index
+ 5 no N/A N/A nothing nothing keep index
+
+ 6 yes N/A yes nothing exists keep index
+ 7 no N/A yes nothing exists keep index
+ 8 yes N/A no nothing exists fail
+ 9 no N/A no nothing exists fail
+
+ 10 yes yes N/A exists nothing remove path from cache
+ 11 no yes N/A exists nothing fail
+ 12 yes no N/A exists nothing fail
+ 13 no no N/A exists nothing fail
+
+ clean (H=M)
+ ------
+ 14 yes exists exists keep index
+ 15 no exists exists keep index
+
+ clean I==H I==M (H!=M)
+ ------------------
+ 16 yes no no exists exists fail
+ 17 no no no exists exists fail
+ 18 yes no yes exists exists keep index
+ 19 no no yes exists exists keep index
+ 20 yes yes no exists exists use M
+ 21 no yes no exists exists fail
+
+In all "keep index" cases, the cache entry stays as in the
+original index file. If the entry were not up to date,
+git-read-tree keeps the copy in the work tree intact when
+operating under the -u flag.
+
+When this form of git-read-tree returns successfully, you can
+see what "local changes" you made are carried forward by running
+"git-diff-cache --cached $M". Note that this does not
+necessarily match "git-diff-cache --cached $H" would have
+produced before such a two tree merge. This is because of cases
+18 and 19 --- if you already had the changes in $M (e.g. maybe
+you picked it up via e-mail in a patch form), "git-diff-cache
+--cached $H" would have told you about the change before this
+merge, but it would not show in "git-diff-cache --cached $M"
+output after two-tree merge.
+
+
+3-Way Merge
+~~~~~~~~~~~
+Each "index" entry has two bits worth of "stage" state. stage 0 is the
+normal one, and is the only one you'd see in any kind of normal use.
+
+However, when you do "git-read-tree" with three trees, the "stage"
+starts out at 1.
+
+This means that you can do
+
+ git-read-tree -m <tree1> <tree2> <tree3>
+
+and you will end up with an index with all of the <tree1> entries in
+"stage1", all of the <tree2> entries in "stage2" and all of the
+<tree3> entries in "stage3".
+
+Furthermore, "git-read-tree" has special-case logic that says: if you see
+a file that matches in all respects in the following states, it
+"collapses" back to "stage0":
+
+ - stage 2 and 3 are the same; take one or the other (it makes no
+ difference - the same work has been done on stage 2 and 3)
+
+ - stage 1 and stage 2 are the same and stage 3 is different; take
+ stage 3 (some work has been done on stage 3)
+
+ - stage 1 and stage 3 are the same and stage 2 is different take
+ stage 2 (some work has been done on stage 2)
+
+The "git-write-tree" command refuses to write a nonsensical tree, and it
+will complain about unmerged entries if it sees a single entry that is not
+stage 0.
+
+Ok, this all sounds like a collection of totally nonsensical rules,
+but it's actually exactly what you want in order to do a fast
+merge. The different stages represent the "result tree" (stage 0, aka
+"merged"), the original tree (stage 1, aka "orig"), and the two trees
+you are trying to merge (stage 2 and 3 respectively).
+
+The order of stages 1, 2 and 3 (hence the order of three
+<tree-ish> command line arguments) are significant when you
+start a 3-way merge with an index file that is already
+populated. Here is an outline of how the algorithm works:
+
+- if a file exists in identical format in all three trees, it will
+ automatically collapse to "merged" state by git-read-tree.
+
+- a file that has _any_ difference what-so-ever in the three trees
+ will stay as separate entries in the index. It's up to "script
+ policy" to determine how to remove the non-0 stages, and insert a
+ merged version.
+
+- the index file saves and restores with all this information, so you
+ can merge things incrementally, but as long as it has entries in
+ stages 1/2/3 (ie "unmerged entries") you can't write the result. So
+ now the merge algorithm ends up being really simple:
+
+ * you walk the index in order, and ignore all entries of stage 0,
+ since they've already been done.
+
+ * if you find a "stage1", but no matching "stage2" or "stage3", you
+ know it's been removed from both trees (it only existed in the
+ original tree), and you remove that entry.
+
+ * if you find a matching "stage2" and "stage3" tree, you remove one
+ of them, and turn the other into a "stage0" entry. Remove any
+ matching "stage1" entry if it exists too. .. all the normal
+ trivial rules ..
+
+You would normally use "git-merge-cache" with supplied
+"git-merge-one-file-script" to do this last step. The script
+does not touch the files in the work tree, and the entire merge
+happens in the index file. In other words, there is no need to
+worry about what is in the working directory, since it is never
+shown and never used.
+
+When you start a 3-way merge with an index file that is already
+populated, it is assumed that it represents the state of the
+files in your work tree, and you can even have files with
+changes unrecorded in the index file. It is further assumed
+that this state is "derived" from the stage 2 tree. The 3-way
+merge refuses to run if it finds an entry in the original index
+file that does not match stage 2.
+
+This is done to prevent you from losing your work-in-progress
+changes. To illustrate, suppose you start from what has been
+commited last to your repository:
+
+ $ JC=`cat .git/HEAD`
+ $ git-checkout-cache -f -u -a $JC
+
+You do random edits, without running git-update-cache. And then
+you notice that the tip of your "upstream" tree has advanced
+since you pulled from him:
+
+ $ git-fetch-script rsync://.... linus
+ $ LT=`cat .git/MERGE_HEAD`
+
+Your work tree is still based on your HEAD ($JC), but you have
+some edits since. Three-way merge makes sure that you have not
+added or modified cache entries since $JC, and if you haven't,
+then does the right thing. So with the following sequence:
+
+ $ git-read-tree -m -u `git-merge-base $JC $LT` $JC $LT
+ $ git-merge-cache git-merge-one-file-script -a
+ $ echo "Merge with Linus" | \
+ git-commit-tree `git-write-tree` -p $JC -p $LT
+
+what you would commit is a pure merge between $JC and LT without
+your work-in-progress changes, and your work tree would be
+updated to the result of the merge.
+
+
+See Also
+--------
+link:git-write-tree.html[git-write-tree]; link:git-ls-files.html[git-ls-files]
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-receive-pack.txt b/Documentation/git-receive-pack.txt
new file mode 100644
index 0000000..d3a835d
--- /dev/null
+++ b/Documentation/git-receive-pack.txt
@@ -0,0 +1,41 @@
+git-receive-pack(1)
+===================
+v0.1, July 2005
+
+NAME
+----
+git-receive-pack - Receive what is pushed into it
+
+
+SYNOPSIS
+--------
+'git-receive-pack' <directory>
+
+DESCRIPTION
+-----------
+Invoked by 'git-send-pack' and updates the repository with the
+information fed from the remote end.
+
+This command is usually not invoked directly by the end user.
+The UI for the protocol is on the 'git-send-pack' side, and the
+program pair is meant to be used to push updates to remote
+repository. For pull operations, see 'git-fetch-pack' and
+'git-clone-pack'.
+
+
+OPTIONS
+-------
+<directory>::
+ The repository to sync into.
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+GIT
+---
+Part of the link:git.html[git] suite
diff --git a/Documentation/git-rename-script.txt b/Documentation/git-rename-script.txt
new file mode 100644
index 0000000..1abc68b
--- /dev/null
+++ b/Documentation/git-rename-script.txt
@@ -0,0 +1,34 @@
+
+git-rename-script(1)
+=====================
+v0.1, May 2005
+
+NAME
+----
+git-rename-script - Script used to rename a file, directory or symlink.
+
+
+SYNOPSIS
+--------
+'git-rename-script' <source> <destination>
+
+DESCRIPTION
+-----------
+This script is used to rename a file, directory or symlink.
+
+The index is updated after successful completion, but the change must still be
+committed.
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+Rewritten by Ryan Anderson <ryan@michonline.com>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-resolve-script.txt b/Documentation/git-resolve-script.txt
new file mode 100644
index 0000000..8dd84a3
--- /dev/null
+++ b/Documentation/git-resolve-script.txt
@@ -0,0 +1,30 @@
+git-resolve-script(1)
+=====================
+v0.1, May 2005
+
+NAME
+----
+git-resolve-script - Script used to merge two trees
+
+
+SYNOPSIS
+--------
+'git-resolve-script'
+
+DESCRIPTION
+-----------
+This script is used by Linus to merge two trees.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt
new file mode 100644
index 0000000..fe86c9c
--- /dev/null
+++ b/Documentation/git-rev-list.txt
@@ -0,0 +1,75 @@
+git-rev-list(1)
+===============
+v0.1, May 2005
+
+NAME
+----
+git-rev-list - Lists commit objects in reverse chronological order
+
+
+SYNOPSIS
+--------
+'git-rev-list' [ *--max-count*=number ] [ *--max-age*=timestamp ] [ *--min-age*=timestamp ] [ *--merge-order* [ *--show-breaks* ] ] <commit>
+
+DESCRIPTION
+-----------
+Lists commit objects in reverse chronological order starting at the
+given commit, taking ancestry relationship into account. This is
+useful to produce human-readable log output.
+
+If *--merge-order* is specified, the commit history is decomposed into a
+unique sequence of minimal, non-linear epochs and maximal, linear epochs.
+Non-linear epochs are then linearised by sorting them into merge order, which
+is described below.
+
+Maximal, linear epochs correspond to periods of sequential development.
+Minimal, non-linear epochs correspond to periods of divergent development
+followed by a converging merge. The theory of epochs is described in more
+detail at
+link:http://blackcubes.dyndns.org/epoch/[http://blackcubes.dyndns.org/epoch/].
+
+The merge order for a non-linear epoch is defined as a linearisation for which
+the following invariants are true:
+
+ 1. if a commit P is reachable from commit N, commit P sorts after commit N
+ in the linearised list.
+ 2. if Pi and Pj are any two parents of a merge M (with i < j), then any
+ commit N, such that N is reachable from Pj but not reachable from Pi,
+ sorts before all commits reachable from Pi.
+
+Invariant 1 states that later commits appear before earlier commits they are
+derived from.
+
+Invariant 2 states that commits unique to "later" parents in a merge, appear
+before all commits from "earlier" parents of a merge.
+
+If *--show-breaks* is specified, each item of the list is output with a
+2-character prefix consisting of one of: (|), (^), (=) followed by a space.
+
+Commits marked with (=) represent the boundaries of minimal, non-linear epochs
+and correspond either to the start of a period of divergent development or to
+the end of such a period.
+
+Commits marked with (|) are direct parents of commits immediately preceding
+the marked commit in the list.
+
+Commits marked with (^) are not parents of the immediately preceding commit.
+These "breaks" represent necessary discontinuities implied by trying to
+represent an arbtirary DAG in a linear form.
+
+*--show-breaks* is only valid if *--merge-order* is also specified.
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Original *--merge-order* logic by Jon Seymour <jon.seymour@gmail.com>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-rev-tree.txt b/Documentation/git-rev-tree.txt
new file mode 100644
index 0000000..2ec7ed0
--- /dev/null
+++ b/Documentation/git-rev-tree.txt
@@ -0,0 +1,88 @@
+git-rev-tree(1)
+===============
+v0.1, May 2005
+
+NAME
+----
+git-rev-tree - Provides the revision tree for one or more commits
+
+
+SYNOPSIS
+--------
+'git-rev-tree' [--edges] [--cache <cache-file>] [^]<commit> [[^]<commit>]
+
+DESCRIPTION
+-----------
+Provides the revision tree for one or more commits.
+
+OPTIONS
+-------
+--edges::
+ Show edges (ie places where the marking changes between parent
+ and child)
+
+--cache <cache-file>::
+ Use the specified file as a cache from a previous git-rev-list run
+ to speed things up. Note that this "cache" is totally different
+ concept from the directory index. Also this option is not
+ implemented yet.
+
+[^]<commit>::
+ The commit id to trace (a leading caret means to ignore this
+ commit-id and below)
+
+Output
+------
+
+ <date> <commit>:<flags> [<parent-commit>:<flags> ]\*
+
+<date>::
+ Date in 'seconds since epoch'
+
+<commit>::
+ id of commit object
+
+<parent-commit>::
+ id of each parent commit object (>1 indicates a merge)
+
+<flags>::
+
+ The flags are read as a bitmask representing each commit
+ provided on the commandline. eg: given the command:
+
+ $ git-rev-tree <com1> <com2> <com3>
+
+ The output:
+
+ <date> <commit>:5
+
+ means that <commit> is reachable from <com1>(1) and <com3>(4)
+
+A revtree can get quite large. "git-rev-tree" will eventually allow
+you to cache previous state so that you don't have to follow the whole
+thing down.
+
+So the change difference between two commits is literally
+
+ git-rev-tree [commit-id1] > commit1-revtree
+ git-rev-tree [commit-id2] > commit2-revtree
+ join -t : commit1-revtree commit2-revtree > common-revisions
+
+(this is also how to find the most common parent - you'd look at just
+the head revisions - the ones that aren't referred to by other
+revisions - in "common-revision", and figure out the best one. I
+think.)
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-send-pack.txt b/Documentation/git-send-pack.txt
new file mode 100644
index 0000000..245a7d0
--- /dev/null
+++ b/Documentation/git-send-pack.txt
@@ -0,0 +1,46 @@
+git-send-pack(1)
+================
+v0.1, July 2005
+
+NAME
+----
+git-send-pack - Push missing objects packed.
+
+
+SYNOPSIS
+--------
+'git-send-pack' [--exec=<git-receive-pack>] [<host>:]<directory> [<head>...]
+
+DESCRIPTION
+-----------
+Invokes 'git-receive-pack' on a possibly remote repository, and
+updates it from the current repository, sending named heads.
+
+
+OPTIONS
+-------
+--exec=<git-receive-pack>::
+ Path to the 'git-receive-pack' program on the remote
+ end. Sometimes useful when pushing to a remote
+ repository over ssh, and you do not have the program in
+ a directory on the default $PATH.
+
+<host>::
+ A remote host to house the repository. When this
+ part is specified, 'git-receive-pack' is invoked via
+ ssh.
+
+<directory>::
+ The repository to update.
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+GIT
+---
+Part of the link:git.html[git] suite
diff --git a/Documentation/git-show-index.txt b/Documentation/git-show-index.txt
new file mode 100644
index 0000000..beefe94
--- /dev/null
+++ b/Documentation/git-show-index.txt
@@ -0,0 +1,36 @@
+git-show-index(1)
+=================
+v0.1, July 2005
+
+NAME
+----
+git-show-index - Show packed archive index
+
+
+SYNOPSIS
+--------
+'git-show-index' < idx-file
+
+
+DESCRIPTION
+-----------
+Reads given idx file for packed GIT archive created with
+git-pack-objects command, and dumps its contents.
+
+The information it outputs is subset of what you can get from
+'git-verify-pack -v'; this command only shows the packfile
+offset and SHA1 of each object.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-ssh-pull.txt b/Documentation/git-ssh-pull.txt
new file mode 100644
index 0000000..ac3fb34
--- /dev/null
+++ b/Documentation/git-ssh-pull.txt
@@ -0,0 +1,52 @@
+git-ssh-pull(1)
+===============
+v0.1, May 2005
+
+NAME
+----
+git-ssh-pull - Pulls from a remote repository over ssh connection
+
+
+
+SYNOPSIS
+--------
+'git-ssh-pull' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] commit-id url
+
+DESCRIPTION
+-----------
+Pulls from a remote repository over ssh connection, invoking
+git-ssh-push on the other end. It functions identically to
+git-ssh-push, aside from which end you run it on.
+
+
+OPTIONS
+-------
+commit-id::
+ Either the hash or the filename under [URL]/refs/ to
+ pull.
+
+-c::
+ Get the commit objects.
+-t::
+ Get trees associated with the commit objects.
+-a::
+ Get all the objects.
+-v::
+ Report what is downloaded.
+-w::
+ Writes the commit-id into the filename under $GIT_DIR/refs/ on
+ the local end after the transfer is complete.
+
+
+Author
+------
+Written by Daniel Barkalow <barkalow@iabervon.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-ssh-push.txt b/Documentation/git-ssh-push.txt
new file mode 100644
index 0000000..e38679d
--- /dev/null
+++ b/Documentation/git-ssh-push.txt
@@ -0,0 +1,48 @@
+git-ssh-push(1)
+===============
+v0.1, Jun 2005
+
+NAME
+----
+git-ssh-push - Pushes to a remote repository over ssh connection
+
+
+SYNOPSIS
+--------
+'git-ssh-push' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] commit-id url
+
+DESCRIPTION
+-----------
+Pushes from a remote repository over ssh connection, invoking
+git-ssh-pull on the other end. It functions identically to
+git-ssh-pull, aside from which end you run it on.
+
+OPTIONS
+-------
+commit-id::
+ Id of commit to push.
+
+-c::
+ Get the commit objects.
+-t::
+ Get tree associated with the requested commit object.
+-a::
+ Get all the objects.
+-v::
+ Report what is uploaded.
+-w::
+ Writes the commit-id into the filename under [URL]/refs/ on
+ the remote end after the transfer is complete.
+
+Author
+------
+Written by Daniel Barkalow <barkalow@iabervon.org>
+
+Documentation
+--------------
+Documentation by Daniel Barkalow
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-tag-script.txt b/Documentation/git-tag-script.txt
new file mode 100644
index 0000000..2df396e
--- /dev/null
+++ b/Documentation/git-tag-script.txt
@@ -0,0 +1,38 @@
+git-tag-script(1)
+=================
+v0.1, May 2005
+
+NAME
+----
+git-tag-script - Create a tag object signed with GPG
+
+
+
+SYNOPSIS
+--------
+'git-tag-script' [-s | -a] [-f] <name>
+
+DESCRIPTION
+-----------
+Adds a "tag" reference in .git/refs/tags/
+
+Unless "-f" is given, the tag must not yet exist in ".git/refs/tags"
+
+If "-s" or "-a" is passed, the user will be prompted for a tag message.
+and a tag object is created. Otherwise just the SHA1 object
+name of the commit object is written.
+
+A GnuPG signed tag object will be created when "-s" is used.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
diff --git a/Documentation/git-tar-tree.txt b/Documentation/git-tar-tree.txt
new file mode 100644
index 0000000..dc8d0fa
--- /dev/null
+++ b/Documentation/git-tar-tree.txt
@@ -0,0 +1,39 @@
+git-tar-tree(1)
+===============
+v0.1, May 2005
+
+NAME
+----
+git-tar-tree - Creates a tar archive of the files in the named tree
+
+
+SYNOPSIS
+--------
+'git-tar-tree' <tree-ish> [ <base> ]
+
+DESCRIPTION
+-----------
+Creates a tar archive containing the tree structure for the named tree.
+When <base> is specified it is added as a leading path as the files in the
+generated tar archive.
+
+git-tar-tree behaves differently when given a tree ID versus when given
+a commit ID or tag ID. In the first case the current time is used as
+modification time of each file in the archive. In the latter case the
+commit time as recorded in the referenced commit object is used instead.
+Additionally the commit ID is stored in a global extended pax header.
+It can be extracted using git-get-tar-commit-id.
+
+
+Author
+------
+Written by Rene Scharfe.
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-unpack-file.txt b/Documentation/git-unpack-file.txt
new file mode 100644
index 0000000..2f2130d
--- /dev/null
+++ b/Documentation/git-unpack-file.txt
@@ -0,0 +1,37 @@
+git-unpack-file(1)
+==================
+v0.1, May 2005
+
+NAME
+----
+git-unpack-file - Creates a temporary file with a blob's contents
+
+
+
+SYNOPSIS
+--------
+'git-unpack-file' <blob>
+
+DESCRIPTION
+-----------
+Creates a file holding the contents of the blob specified by sha1. It
+returns the name of the temporary file in the following format:
+ .merge_file_XXXXX
+
+OPTIONS
+-------
+<blob>::
+ Must be a blob id
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt
new file mode 100644
index 0000000..a12b843
--- /dev/null
+++ b/Documentation/git-unpack-objects.txt
@@ -0,0 +1,33 @@
+git-unpack-objects(1)
+=====================
+v0.1, July 2005
+
+NAME
+----
+git-unpack-objects - Unpack objects from a packed archive.
+
+
+SYNOPSIS
+--------
+'git-unpack-objects' < pack-file
+
+
+DESCRIPTION
+-----------
+Reads a packed archive (.pack) from the standard input, and
+expands the objects contained in the pack into "one-file
+one-object" format in $GIT_OBJECT_DIRECTORY.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+-------------
+Documentation by Junio C Hamano
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-update-cache.txt b/Documentation/git-update-cache.txt
new file mode 100644
index 0000000..7340e89
--- /dev/null
+++ b/Documentation/git-update-cache.txt
@@ -0,0 +1,126 @@
+git-update-cache(1)
+===================
+v0.1, May 2005
+
+NAME
+----
+git-update-cache - Modifies the index or directory cache
+
+
+SYNOPSIS
+--------
+'git-update-cache'
+ [--add] [--remove] [--refresh] [--replace]
+ [--ignore-missing]
+ [--force-remove]
+ [--cacheinfo <mode> <object> <file>]\*
+ [--info-only]
+ [--] [<file>]\*
+
+DESCRIPTION
+-----------
+Modifies the index or directory cache. Each file mentioned is updated
+into the cache and any 'unmerged' or 'needs updating' state is
+cleared.
+
+The way "git-update-cache" handles files it is told about can be modified
+using the various options:
+
+OPTIONS
+-------
+--add::
+ If a specified file isn't in the cache already then it's
+ added.
+ Default behaviour is to ignore new files.
+
+--remove::
+ If a specified file is in the cache but is missing then it's
+ removed.
+ Default behaviour is to ignore removed file.
+
+--refresh::
+ Looks at the current cache and checks to see if merges or
+ updates are needed by checking stat() information.
+
+--ignore-missing::
+ Ignores missing files during a --refresh
+
+--cacheinfo <mode> <object> <path>::
+ Directly insert the specified info into the cache.
+
+--info-only::
+ Do not create objects in the object database for all
+ <file> arguments that follow this flag; just insert
+ their object IDs into the cache.
+
+--force-remove::
+ Remove the file from the index even when the working directory
+ still has such a file. (Implies --remove.)
+
+--replace::
+ By default, when a file `path` exists in the index,
+ git-update-cache refuses an attempt to add `path/file`.
+ Similarly if a file `path/file` exists, a file `path`
+ cannot be added. With --replace flag, existing entries
+ that conflicts with the entry being added are
+ automatically removed with warning messages.
+
+--::
+ Do not interpret any more arguments as options.
+
+<file>::
+ Files to act on.
+ Note that files begining with '.' are discarded. This includes
+ `./file` and `dir/./file`. If you don't want this, then use
+ cleaner names.
+ The same applies to directories ending '/' and paths with '//'
+
+Using --refresh
+---------------
+'--refresh' does not calculate a new sha1 file or bring the cache
+up-to-date for mode/content changes. But what it *does* do is to
+"re-match" the stat information of a file with the cache, so that you
+can refresh the cache for a file that hasn't been changed but where
+the stat entry is out of date.
+
+For example, you'd want to do this after doing a "git-read-tree", to link
+up the stat cache details with the proper files.
+
+Using --cacheinfo or --info-only
+--------------------------------
+'--cacheinfo' is used to register a file that is not in the
+current working directory. This is useful for minimum-checkout
+merging.
+
+ To pretend you have a file with mode and sha1 at path, say:
+
+ $ git-update-cache --cacheinfo mode sha1 path
+
+'--info-only' is used to register files without placing them in the object
+database. This is useful for status-only repositories.
+
+Both '--cacheinfo' and '--info-only' behave similarly: the index is updated
+but the object database isn't. '--cacheinfo' is useful when the object is
+in the database but the file isn't available locally. '--info-only' is
+useful when the file is available, but you do not wish to update the
+object database.
+
+Examples
+--------
+To update and refresh only the files already checked out:
+
+ git-checkout-cache -n -f -a && git-update-cache --ignore-missing --refresh
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-update-server-info.txt b/Documentation/git-update-server-info.txt
new file mode 100644
index 0000000..4f65acb
--- /dev/null
+++ b/Documentation/git-update-server-info.txt
@@ -0,0 +1,42 @@
+git-update-server-info(1)
+=========================
+v0.1, July 2005
+
+NAME
+----
+git-update-server-info - Update auxiliary info file to help dumb servers
+
+
+SYNOPSIS
+--------
+'git-update-server-info' [--force]
+
+DESCRIPTION
+-----------
+A dumb server that does not do on-the-fly pack generations can
+have some auxiliary information files in $GIT_DIR/info and
+$GIT_OBJECT_DIRECTORY/info directories to help clients discover
+what references and packs the server has and make an optimized
+pull decisions. This command generates such auxiliary files.
+
+
+OPTIONS
+-------
+
+--force::
+ Update the info files even when they do not appear
+ stale.
+
+
+Author
+------
+Written by Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-upload-pack.txt b/Documentation/git-upload-pack.txt
new file mode 100644
index 0000000..be597a1
--- /dev/null
+++ b/Documentation/git-upload-pack.txt
@@ -0,0 +1,40 @@
+git-upload-pack(1)
+==================
+v0.1, July 2005
+
+NAME
+----
+git-upload-pack - Send missing objects packed.
+
+
+SYNOPSIS
+--------
+'git-upload-pack' <directory>
+
+DESCRIPTION
+-----------
+Invoked by 'git-clone-pack' and/or 'git-fetch-pack', learns what
+objects the other side is missing, and sends them after packing.
+
+This command is usually not invoked directly by the end user.
+The UI for the protocol is on the 'git-fetch-pack' side, and the
+program pair is meant to be used to pull updates from a remote
+repository. For push operations, see 'git-send-pack'.
+
+
+OPTIONS
+-------
+<directory>::
+ The repository to sync from.
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano.
+
+GIT
+---
+Part of the link:git.html[git] suite
diff --git a/Documentation/git-var.txt b/Documentation/git-var.txt
new file mode 100644
index 0000000..45b1006
--- /dev/null
+++ b/Documentation/git-var.txt
@@ -0,0 +1,60 @@
+git-var(1)
+==========
+v0.1, July 2005
+
+NAME
+----
+git-var - Print the git users identity
+
+
+SYNOPSIS
+--------
+git-var [ -l | <variable> ]
+
+DESCRIPTION
+-----------
+Prints a git logical variable.
+
+-l causes the logical variables to be listed.
+
+EXAMPLE
+--------
+$git-var GIT_AUTHOR_IDENT
+
+Eric W. Biederman <ebiederm@lnxi.com> 1121223278 -0600
+
+
+VARIABLES
+----------
+GIT_AUTHOR_IDENT
+ The author of a piece of code.
+
+GIT_COMMITTER_IDENT
+ The person who put a piece of code into git.
+
+Diagnostics
+-----------
+You don't exist. Go away!::
+ The passwd(5) gecos field couldn't be read
+Your parents must have hated you!::
+ The password(5) gecos field is longer than a giant static buffer.
+Your sysadmin must hate you!::
+ The password(5) name field is longer than a giant static buffer.
+
+See Also
+--------
+link:git-commit-tree.html[git-commit-tree]
+link:git-tag-script.html[git-tag-script]
+
+Author
+------
+Written by Eric Biederman <ebiederm@xmission.com>
+
+Documentation
+--------------
+Documentation by Eric Biederman and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-verify-pack.txt b/Documentation/git-verify-pack.txt
new file mode 100644
index 0000000..d1043eb
--- /dev/null
+++ b/Documentation/git-verify-pack.txt
@@ -0,0 +1,49 @@
+git-verify-pack(1)
+==================
+v0.1, June 2005
+
+NAME
+----
+git-verify-pack - Validate packed GIT archive files.
+
+
+SYNOPSIS
+--------
+'git-verify-pack' [-v] <pack>.idx ...
+
+
+DESCRIPTION
+-----------
+Reads given idx file for packed GIT archive created with
+git-pack-objects command and verifies idx file and the
+corresponding pack file.
+
+OPTIONS
+-------
+<pack>.idx ...::
+ The idx files to verify.
+
+-v::
+ After verifying the pack, show list of objects contained
+ in the pack. The format used is:
+
+ SHA1 type size offset-in-packfile
+
+ for objects that are not deltified in the pack, and
+
+ SHA1 type size offset-in-packfile depth base-SHA1
+
+ for objects that are deltified.
+
+Author
+------
+Written by Junio C Hamano <junkio@cox.net>
+
+Documentation
+--------------
+Documentation by Junio C Hamano
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git-write-tree.txt b/Documentation/git-write-tree.txt
new file mode 100644
index 0000000..7f076b4
--- /dev/null
+++ b/Documentation/git-write-tree.txt
@@ -0,0 +1,44 @@
+git-write-tree(1)
+=================
+v0.1, May 2005
+
+NAME
+----
+git-write-tree - Creates a tree from the current cache
+
+
+SYNOPSIS
+--------
+'git-write-tree'
+ [--missing-ok]
+
+DESCRIPTION
+-----------
+Creates a tree object using the current cache.
+
+The cache must be merged.
+
+Conceptually, "git-write-tree" sync()s the current directory cache contents
+into a set of tree files.
+In order to have that match what is actually in your directory right
+now, you need to have done a "git-update-cache" phase before you did the
+"git-write-tree".
+
+OPTIONS
+-------
+--missing-ok::
+ Normally "git-write-tree" ensures that the objects referenced by the
+ directory exist in the object database. This option disables this check.
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/git.txt b/Documentation/git.txt
new file mode 100644
index 0000000..4c01b2e
--- /dev/null
+++ b/Documentation/git.txt
@@ -0,0 +1,351 @@
+git(7)
+======
+May 2005
+
+NAME
+----
+git - the stupid content tracker
+
+
+SYNOPSIS
+--------
+'git-<command>' <args>
+
+DESCRIPTION
+-----------
+
+This is reference information for the core git commands.
+
+The Discussion section below contains much useful definition and
+clarification info - read that first. And of the commands, I suggest
+reading link:git-update-cache.html[git-update-cache] and
+link:git-read-tree.html[git-read-tree] first - I wish I had!
+
+David Greaves <david@dgreaves.com>
+08/05/05
+
+Updated by Junio C Hamano <junkio@cox.net> on 2005-05-05 to
+reflect recent changes.
+
+Commands Overview
+-----------------
+The git commands can helpfully be split into those that manipulate
+the repository, the cache and the working fileset, those that
+interrogate and compare them, and those that moves objects and
+references between repositories.
+
+There are also some ancilliary programs that can be viewed as useful
+aids for using the core commands but which are unlikely to be used by
+SCMs layered over git.
+
+Manipulation commands
+~~~~~~~~~~~~~~~~~~~~~
+link:git-checkout-cache.html[git-checkout-cache]::
+ Copy files from the cache to the working directory
+
+link:git-commit-tree.html[git-commit-tree]::
+ Creates a new commit object
+
+link:git-init-db.html[git-init-db]::
+ Creates an empty git object database
+
+link:git-merge-base.html[git-merge-base]::
+ Finds as good a common ancestor as possible for a merge
+
+link:git-mktag.html[git-mktag]::
+ Creates a tag object
+
+link:git-read-tree.html[git-read-tree]::
+ Reads tree information into the directory cache
+
+link:git-update-cache.html[git-update-cache]::
+ Modifies the index or directory cache
+
+link:git-hash-object.html[git-hash-object]::
+ Computes the object ID from a file.
+
+link:git-write-tree.html[git-write-tree]::
+ Creates a tree from the current cache
+
+Interrogation commands
+~~~~~~~~~~~~~~~~~~~~~~
+link:git-cat-file.html[git-cat-file]::
+ Provide content or type information for repository objects
+
+link:git-check-files.html[git-check-files]::
+ Verify a list of files are up-to-date
+
+link:git-diff-cache.html[git-diff-cache]::
+ Compares content and mode of blobs between the cache and repository
+
+link:git-diff-files.html[git-diff-files]::
+ Compares files in the working tree and the cache
+
+link:git-diff-tree.html[git-diff-tree]::
+ Compares the content and mode of blobs found via two tree objects
+
+link:git-export.html[git-export]::
+ Exports each commit and a diff against each of its parents
+
+link:git-fsck-cache.html[git-fsck-cache]::
+ Verifies the connectivity and validity of the objects in the database
+
+link:git-ls-files.html[git-ls-files]::
+ Information about files in the cache/working directory
+
+link:git-ls-tree.html[git-ls-tree]::
+ Displays a tree object in human readable form
+
+link:git-merge-cache.html[git-merge-cache]::
+ Runs a merge for files needing merging
+
+link:git-rev-list.html[git-rev-list]::
+ Lists commit objects in reverse chronological order
+
+link:git-rev-tree.html[git-rev-tree]::
+ Provides the revision tree for one or more commits
+
+link:git-tar-tree.html[git-tar-tree]::
+ Creates a tar archive of the files in the named tree
+
+link:git-unpack-file.html[git-unpack-file]::
+ Creates a temporary file with a blob's contents
+
+link:git-var.html[git-var]::
+ Displays a git logical variable
+
+link:git-verify-pack.html[git-verify-pack]::
+ Validates packed GIT archive files
+
+The interrogate commands may create files - and you can force them to
+touch the working file set - but in general they don't
+
+
+Synching repositories
+~~~~~~~~~~~~~~~~~~~~~
+
+link:git-clone-script.html[git-clone-script]::
+ Clones a repository into the current repository (user interface)
+
+link:git-clone-pack.html[git-clone-pack]::
+ Clones a repository into the current repository (engine
+ for ssh and local transport)
+
+link:git-fetch-script.html[git-fetch-script]::
+ Download from a remote repository via various protocols
+ (user interface).
+
+link:git-pull-script.html[git-pull-script]::
+ Fetch from and merge with a remote repository via
+ various protocols (user interface).
+
+link:git-http-pull.html[git-http-pull]::
+ Downloads a remote GIT repository via HTTP
+
+link:git-local-pull.html[git-local-pull]::
+ Duplicates another GIT repository on a local system
+
+link:git-ssh-pull.html[git-ssh-pull]::
+ Pulls from a remote repository over ssh connection
+
+link:git-send-pack.html[git-send-pack]::
+ Pushes to a remote repository, intelligently.
+
+link:git-receive-pack.html[git-receive-pack]::
+ Invoked by 'git-send-pack' to receive what is pushed to it.
+
+link:git-clone-pack.html[git-clone-pack]::
+ Clones from a remote repository.
+
+link:git-fetch-pack.html[git-fetch-pack]::
+ Updates from a remote repository.
+
+link:git-peek-remote.html[git-peek-remote]::
+ Lists references on a remote repository using upload-pack protocol.
+
+link:git-upload-pack.html[git-upload-pack]::
+ Invoked by 'git-clone-pack' and 'git-fetch-pack' to push
+ what are asked for.
+
+link:git-update-server-info.html[git-update-server-info]::
+ Updates auxiliary information on a dumb server to help
+ clients discover references and packs on it.
+
+
+Ancilliary Commands
+-------------------
+Manipulators:
+
+link:git-apply-patch-script.html[git-apply-patch-script]::
+ Sample script to apply the diffs from git-diff-*
+
+link:git-convert-cache.html[git-convert-cache]::
+ Converts old-style GIT repository
+
+link:git-merge-one-file-script.html[git-merge-one-file-script]::
+ The standard helper program to use with "git-merge-cache"
+
+link:git-prune-script.html[git-prune-script]::
+ Prunes all unreachable objects from the object database
+
+link:git-resolve-script.html[git-resolve-script]::
+ Script used to merge two trees
+
+link:git-tag-script.html[git-tag-script]::
+ An example script to create a tag object signed with GPG
+
+
+Interogators:
+
+link:git-diff-helper.html[git-diff-helper]::
+ Generates patch format output for git-diff-*
+
+link:git-ssh-push.html[git-ssh-push]::
+ Helper "server-side" program used by git-ssh-pull
+
+
+
+Identifier Terminology
+----------------------
+<object>::
+ Indicates the sha1 identifier for any type of object
+
+<blob>::
+ Indicates a blob object sha1 identifier
+
+<tree>::
+ Indicates a tree object sha1 identifier
+
+<commit>::
+ Indicates a commit object sha1 identifier
+
+<tree-ish>::
+ Indicates a tree, commit or tag object sha1 identifier. A
+ command that takes a <tree-ish> argument ultimately wants to
+ operate on a <tree> object but automatically dereferences
+ <commit> and <tag> objects that point at a <tree>.
+
+<type>::
+ Indicates that an object type is required.
+ Currently one of: blob/tree/commit/tag
+
+<file>::
+ Indicates a filename - always relative to the root of
+ the tree structure GIT_INDEX_FILE describes.
+
+Symbolic Identifiers
+--------------------
+Any git comand accepting any <object> can also use the following
+symbolic notation:
+
+HEAD::
+ indicates the head of the repository (ie the contents of
+ `$GIT_DIR/HEAD`)
+<tag>::
+ a valid tag 'name'+
+ (ie the contents of `$GIT_DIR/refs/tags/<tag>`)
+<head>::
+ a valid head 'name'+
+ (ie the contents of `$GIT_DIR/refs/heads/<head>`)
+<snap>::
+ a valid snapshot 'name'+
+ (ie the contents of `$GIT_DIR/refs/snap/<snap>`)
+
+
+File/Directory Structure
+------------------------
+The git-core manipulates the following areas in the directory:
+
+ .git/ The base (overridden with $GIT_DIR)
+ objects/ The object base (overridden with $GIT_OBJECT_DIRECTORY)
+ ??/ 'First 2 chars of object' directories.
+ pack/ Packed archives.
+
+ refs/ Directories containing symbolic names for objects
+ (each file contains the hex SHA1 + newline)
+ heads/ Commits which are heads of various sorts
+ tags/ Tags, by the tag name (or some local renaming of it)
+ */ Any other subdirectory of refs/ can be used to store
+ files similar to what are under refs/heads/.
+ HEAD Symlink to refs/heads/<current-branch-name>
+
+Higher level SCMs may provide and manage additional information in the
+GIT_DIR.
+
+Terminology
+-----------
+Each line contains terms which you may see used interchangeably
+
+ object database, .git directory
+ directory cache, index
+ id, sha1, sha1-id, sha1 hash
+ type, tag
+
+
+Environment Variables
+---------------------
+Various git commands use the following environment variables:
+
+The git Repository
+~~~~~~~~~~~~~~~~~~
+These environment variables apply to 'all' core git commands. Nb: it
+is worth noting that they may be used/overridden by SCMS sitting above
+git so take care if using Cogito etc
+
+'GIT_INDEX_FILE'::
+ This environment allows the specification of an alternate
+ cache/index file. If not specified, the default of
+ `$GIT_DIR/index` is used.
+
+'GIT_OBJECT_DIRECTORY'::
+ If the object storage directory is specified via this
+ environment variable then the sha1 directories are created
+ underneath - otherwise the default `$GIT_DIR/objects`
+ directory is used.
+
+'GIT_ALTERNATE_OBJECT_DIRECTORIES'::
+ Due to the immutable nature of git objects, old objects can be
+ archived into shared, read-only directories. This variable
+ specifies a ":" seperated list of git object directories which
+ can be used to search for git objects. New objects will not be
+ written to these directories.
+
+'GIT_DIR'::
+ If the 'GIT_DIR' environment variable is set then it specifies
+ a path to use instead of `./.git` for the base of the
+ repository.
+
+git Commits
+~~~~~~~~~~~
+'GIT_AUTHOR_NAME'::
+'GIT_AUTHOR_EMAIL'::
+'GIT_AUTHOR_DATE'::
+'GIT_COMMITTER_NAME'::
+'GIT_COMMITTER_EMAIL'::
+ see link:git-commit-tree.html[git-commit-tree]
+
+git Diffs
+~~~~~~~~~
+'GIT_DIFF_OPTS'::
+'GIT_EXTERNAL_DIFF'::
+ see the "generating patches" section in :
+ link:git-diff-cache.html[git-diff-cache];
+ link:git-diff-files.html[git-diff-files];
+ link:git-diff-tree.html[git-diff-tree]
+
+Discussion
+----------
+include::../README[]
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org> and the git-list <git@vger.kernel.org>.
+
+Documentation
+--------------
+Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the link:git.html[git] suite
+
diff --git a/Documentation/pack-protocol.txt b/Documentation/pack-protocol.txt
new file mode 100644
index 0000000..7d6aec4
--- /dev/null
+++ b/Documentation/pack-protocol.txt
@@ -0,0 +1,38 @@
+There are two Pack push-pull protocols.
+
+upload-pack (S) | fetch/clone-pack (C) protocol:
+
+ # Tell the puller what commits we have and what their names are
+ S: SHA1 name
+ S: ...
+ S: SHA1 name
+ S: # flush -- it's your turn
+ # Tell the pusher what commits we want, and what we have
+ C: want name
+ C: ..
+ C: want name
+ C: have SHA1
+ C: have SHA1
+ C: ...
+ C: # flush -- occasionally ask "had enough?"
+ S: NAK
+ C: have SHA1
+ C: ...
+ C: have SHA1
+ S: ACK
+ C: done
+ S: XXXXXXX -- packfile contents.
+
+send-pack | receive-pack protocol.
+
+ # Tell the pusher what commits we have and what their names are
+ C: SHA1 name
+ C: ...
+ C: SHA1 name
+ C: # flush -- it's your turn
+ # Tell the puller what the pusher has
+ S: old-SHA1 new-SHA1 name
+ S: old-SHA1 new-SHA1 name
+ S: ...
+ S: # flush -- done with the list
+ S: XXXXXXX --- packfile contents.
diff --git a/Documentation/pull-fetch-param.txt b/Documentation/pull-fetch-param.txt
new file mode 100644
index 0000000..7ae4ba0
--- /dev/null
+++ b/Documentation/pull-fetch-param.txt
@@ -0,0 +1,36 @@
+<repository>::
+ The "remote" repository to pull from. One of the
+ following notations can be used to name the repository
+ to pull from:
+
+ Rsync URL
+ rsync://remote.machine/path/to/repo.git/
+
+ HTTP(s) URL
+ http://remote.machine/path/to/repo.git/
+
+ GIT URL
+ git://remote.machine/path/to/repo.git/
+ remote.machine:/path/to/repo.git/
+
+ Local directory
+ /path/to/repo.git/
+
+ In addition to that, as a short-hand, the name of a file
+ in $GIT_DIR/branches directory can be specified; the
+ named file should contain a single line, a URL in one of
+ the above formats, optionally followed by a hash '#' and
+ the name of remote head.
+
+<head>::
+ The remote head name to fetch from. That is, make the
+ objects reachable from the commit recorded in
+ $GIT_DIR/refs/heads/<head> in the remote repository
+ available locally.
+
+tag <tag>::
+ The remote head tag to fetch from. That is, make the
+ objects reachable from the commit recorded in
+ $GIT_DIR/refs/tags/<tag> in the remote repository
+ available locally.
+
diff --git a/Documentation/tutorial.txt b/Documentation/tutorial.txt
new file mode 100644
index 0000000..74e2e0a
--- /dev/null
+++ b/Documentation/tutorial.txt
@@ -0,0 +1,1111 @@
+A short git tutorial
+====================
+May 2005
+
+
+Introduction
+------------
+
+This is trying to be a short tutorial on setting up and using a git
+archive, mainly because being hands-on and using explicit examples is
+often the best way of explaining what is going on.
+
+In normal life, most people wouldn't use the "core" git programs
+directly, but rather script around them to make them more palatable.
+Understanding the core git stuff may help some people get those scripts
+done, though, and it may also be instructive in helping people
+understand what it is that the higher-level helper scripts are actually
+doing.
+
+The core git is often called "plumbing", with the prettier user
+interfaces on top of it called "porcelain". You may not want to use the
+plumbing directly very often, but it can be good to know what the
+plumbing does for when the porcelain isn't flushing...
+
+
+Creating a git archive
+----------------------
+
+Creating a new git archive couldn't be easier: all git archives start
+out empty, and the only thing you need to do is find yourself a
+subdirectory that you want to use as a working tree - either an empty
+one for a totally new project, or an existing working tree that you want
+to import into git.
+
+For our first example, we're going to start a totally new archive from
+scratch, with no pre-existing files, and we'll call it "git-tutorial".
+To start up, create a subdirectory for it, change into that
+subdirectory, and initialize the git infrastructure with "git-init-db":
+
+ mkdir git-tutorial
+ cd git-tutorial
+ git-init-db
+
+to which git will reply
+
+ defaulting to local storage area
+
+which is just git's way of saying that you haven't been doing anything
+strange, and that it will have created a local .git directory setup for
+your new project. You will now have a ".git" directory, and you can
+inspect that with "ls". For your new empty project, ls should show you
+three entries:
+
+ - a symlink called HEAD, pointing to "refs/heads/master"
+
+ Don't worry about the fact that the file that the HEAD link points to
+ doesn't even exist yet - you haven't created the commit that will
+ start your HEAD development branch yet.
+
+ - a subdirectory called "objects", which will contain all the git SHA1
+ objects of your project. You should never have any real reason to
+ look at the objects directly, but you might want to know that these
+ objects are what contains all the real _data_ in your repository.
+
+ - a subdirectory called "refs", which contains references to objects.
+
+ In particular, the "refs" subdirectory will contain two other
+ subdirectories, named "heads" and "tags" respectively. They do
+ exactly what their names imply: they contain references to any number
+ of different "heads" of development (aka "branches"), and to any
+ "tags" that you have created to name specific versions of your
+ repository.
+
+ One note: the special "master" head is the default branch, which is
+ why the .git/HEAD file was created as a symlink to it even if it
+ doesn't yet exist. Basically, the HEAD link is supposed to always
+ point to the branch you are working on right now, and you always
+ start out expecting to work on the "master" branch.
+
+ However, this is only a convention, and you can name your branches
+ anything you want, and don't have to ever even _have_ a "master"
+ branch. A number of the git tools will assume that .git/HEAD is
+ valid, though.
+
+ [ Implementation note: an "object" is identified by its 160-bit SHA1
+ hash, aka "name", and a reference to an object is always the 40-byte
+ hex representation of that SHA1 name. The files in the "refs"
+ subdirectory are expected to contain these hex references (usually
+ with a final '\n' at the end), and you should thus expect to see a
+ number of 41-byte files containing these references in this refs
+ subdirectories when you actually start populating your tree ]
+
+You have now created your first git archive. Of course, since it's
+empty, that's not very useful, so let's start populating it with data.
+
+
+ Populating a git archive
+ ------------------------
+
+We'll keep this simple and stupid, so we'll start off with populating a
+few trivial files just to get a feel for it.
+
+Start off with just creating any random files that you want to maintain
+in your git archive. We'll start off with a few bad examples, just to
+get a feel for how this works:
+
+ echo "Hello World" > a
+ echo "Silly example" > b
+
+you have now created two files in your working directory, but to
+actually check in your hard work, you will have to go through two steps:
+
+ - fill in the "cache" aka "index" file with the information about your
+ working directory state
+
+ - commit that index file as an object.
+
+The first step is trivial: when you want to tell git about any changes
+to your working directory, you use the "git-update-cache" program. That
+program normally just takes a list of filenames you want to update, but
+to avoid trivial mistakes, it refuses to add new entries to the cache
+(or remove existing ones) unless you explicitly tell it that you're
+adding a new entry with the "--add" flag (or removing an entry with the
+"--remove") flag.
+
+So to populate the index with the two files you just created, you can do
+
+ git-update-cache --add a b
+
+and you have now told git to track those two files.
+
+In fact, as you did that, if you now look into your object directory,
+you'll notice that git will have added two new objects to the object
+store. If you did exactly the steps above, you should now be able to do
+
+ ls .git/objects/??/*
+
+and see two files:
+
+ .git/objects/55/7db03de997c86a4a028e1ebd3a1ceb225be238
+ .git/objects/f2/4c74a2e500f5ee1332c86b94199f52b1d1d962
+
+which correspond with the object with SHA1 names of 557db... and f24c7..
+respectively.
+
+If you want to, you can use "git-cat-file" to look at those objects, but
+you'll have to use the object name, not the filename of the object:
+
+ git-cat-file -t 557db03de997c86a4a028e1ebd3a1ceb225be238
+
+where the "-t" tells git-cat-file to tell you what the "type" of the
+object is. Git will tell you that you have a "blob" object (ie just a
+regular file), and you can see the contents with
+
+ git-cat-file "blob" 557db03de997c86a4a028e1ebd3a1ceb225be238
+
+which will print out "Hello World". The object 557db... is nothing
+more than the contents of your file "a".
+
+[ Digression: don't confuse that object with the file "a" itself. The
+ object is literally just those specific _contents_ of the file, and
+ however much you later change the contents in file "a", the object we
+ just looked at will never change. Objects are immutable. ]
+
+Anyway, as we mentioned previously, you normally never actually take a
+look at the objects themselves, and typing long 40-character hex SHA1
+names is not something you'd normally want to do. The above digression
+was just to show that "git-update-cache" did something magical, and
+actually saved away the contents of your files into the git content
+store.
+
+Updating the cache did something else too: it created a ".git/index"
+file. This is the index that describes your current working tree, and
+something you should be very aware of. Again, you normally never worry
+about the index file itself, but you should be aware of the fact that
+you have not actually really "checked in" your files into git so far,
+you've only _told_ git about them.
+
+However, since git knows about them, you can now start using some of the
+most basic git commands to manipulate the files or look at their status.
+
+In particular, let's not even check in the two files into git yet, we'll
+start off by adding another line to "a" first:
+
+ echo "It's a new day for git" >> a
+
+and you can now, since you told git about the previous state of "a", ask
+git what has changed in the tree compared to your old index, using the
+"git-diff-files" command:
+
+ git-diff-files
+
+oops. That wasn't very readable. It just spit out its own internal
+version of a "diff", but that internal version really just tells you
+that it has noticed that "a" has been modified, and that the old object
+contents it had have been replaced with something else.
+
+To make it readable, we can tell git-diff-files to output the
+differences as a patch, using the "-p" flag:
+
+ git-diff-files -p
+
+which will spit out
+
+ diff --git a/a b/a
+ --- a/a
+ +++ b/a
+ @@ -1 +1,2 @@
+ Hello World
+ +It's a new day for git
+
+ie the diff of the change we caused by adding another line to "a".
+
+In other words, git-diff-files always shows us the difference between
+what is recorded in the index, and what is currently in the working
+tree. That's very useful.
+
+A common shorthand for "git-diff-files -p" is to just write
+
+ git diff
+
+which will do the same thing.
+
+
+ Committing git state
+ --------------------
+
+Now, we want to go to the next stage in git, which is to take the files
+that git knows about in the index, and commit them as a real tree. We do
+that in two phases: creating a "tree" object, and committing that "tree"
+object as a "commit" object together with an explanation of what the
+tree was all about, along with information of how we came to that state.
+
+Creating a tree object is trivial, and is done with "git-write-tree".
+There are no options or other input: git-write-tree will take the
+current index state, and write an object that describes that whole
+index. In other words, we're now tying together all the different
+filenames with their contents (and their permissions), and we're
+creating the equivalent of a git "directory" object:
+
+ git-write-tree
+
+and this will just output the name of the resulting tree, in this case
+(if you have does exactly as I've described) it should be
+
+ 3ede4ed7e895432c0a247f09d71a76db53bd0fa4
+
+which is another incomprehensible object name. Again, if you want to,
+you can use "git-cat-file -t 3ede4.." to see that this time the object
+is not a "blob" object, but a "tree" object (you can also use
+git-cat-file to actually output the raw object contents, but you'll see
+mainly a binary mess, so that's less interesting).
+
+However - normally you'd never use "git-write-tree" on its own, because
+normally you always commit a tree into a commit object using the
+"git-commit-tree" command. In fact, it's easier to not actually use
+git-write-tree on its own at all, but to just pass its result in as an
+argument to "git-commit-tree".
+
+"git-commit-tree" normally takes several arguments - it wants to know
+what the _parent_ of a commit was, but since this is the first commit
+ever in this new archive, and it has no parents, we only need to pass in
+the tree ID. However, git-commit-tree also wants to get a commit message
+on its standard input, and it will write out the resulting ID for the
+commit to its standard output.
+
+And this is where we start using the .git/HEAD file. The HEAD file is
+supposed to contain the reference to the top-of-tree, and since that's
+exactly what git-commit-tree spits out, we can do this all with a simple
+shell pipeline:
+
+ echo "Initial commit" | git-commit-tree $(git-write-tree) > .git/HEAD
+
+which will say:
+
+ Committing initial tree 3ede4ed7e895432c0a247f09d71a76db53bd0fa4
+
+just to warn you about the fact that it created a totally new commit
+that is not related to anything else. Normally you do this only _once_
+for a project ever, and all later commits will be parented on top of an
+earlier commit, and you'll never see this "Committing initial tree"
+message ever again.
+
+Again, normally you'd never actually do this by hand. There is a
+helpful script called "git commit" that will do all of this for you. So
+you could have just writtten
+
+ git commit
+
+instead, and it would have done the above magic scripting for you.
+
+
+ Making a change
+ ---------------
+
+Remember how we did the "git-update-cache" on file "a" and then we
+changed "a" afterward, and could compare the new state of "a" with the
+state we saved in the index file?
+
+Further, remember how I said that "git-write-tree" writes the contents
+of the _index_ file to the tree, and thus what we just committed was in
+fact the _original_ contents of the file "a", not the new ones. We did
+that on purpose, to show the difference between the index state, and the
+state in the working directory, and how they don't have to match, even
+when we commit things.
+
+As before, if we do "git-diff-files -p" in our git-tutorial project,
+we'll still see the same difference we saw last time: the index file
+hasn't changed by the act of committing anything. However, now that we
+have committed something, we can also learn to use a new command:
+"git-diff-cache".
+
+Unlike "git-diff-files", which showed the difference between the index
+file and the working directory, "git-diff-cache" shows the differences
+between a committed _tree_ and either the the index file or the working
+directory. In other words, git-diff-cache wants a tree to be diffed
+against, and before we did the commit, we couldn't do that, because we
+didn't have anything to diff against.
+
+But now we can do
+
+ git-diff-cache -p HEAD
+
+(where "-p" has the same meaning as it did in git-diff-files), and it
+will show us the same difference, but for a totally different reason.
+Now we're comparing the working directory not against the index file,
+but against the tree we just wrote. It just so happens that those two
+are obviously the same, so we get the same result.
+
+Again, because this is a common operation, you can also just shorthand
+it with
+
+ git diff HEAD
+
+which ends up doing the above for you.
+
+In other words, "git-diff-cache" normally compares a tree against the
+working directory, but when given the "--cached" flag, it is told to
+instead compare against just the index cache contents, and ignore the
+current working directory state entirely. Since we just wrote the index
+file to HEAD, doing "git-diff-cache --cached -p HEAD" should thus return
+an empty set of differences, and that's exactly what it does.
+
+[ Digression: "git-diff-cache" really always uses the index for its
+ comparisons, and saying that it compares a tree against the working
+ directory is thus not strictly accurate. In particular, the list of
+ files to compare (the "meta-data") _always_ comes from the index file,
+ regardless of whether the --cached flag is used or not. The --cached
+ flag really only determines whether the file _contents_ to be compared
+ come from the working directory or not.
+
+ This is not hard to understand, as soon as you realize that git simply
+ never knows (or cares) about files that it is not told about
+ explicitly. Git will never go _looking_ for files to compare, it
+ expects you to tell it what the files are, and that's what the index
+ is there for. ]
+
+However, our next step is to commit the _change_ we did, and again, to
+understand what's going on, keep in mind the difference between "working
+directory contents", "index file" and "committed tree". We have changes
+in the working directory that we want to commit, and we always have to
+work through the index file, so the first thing we need to do is to
+update the index cache:
+
+ git-update-cache a
+
+(note how we didn't need the "--add" flag this time, since git knew
+about the file already).
+
+Note what happens to the different git-diff-xxx versions here. After
+we've updated "a" in the index, "git-diff-files -p" now shows no
+differences, but "git-diff-cache -p HEAD" still _does_ show that the
+current state is different from the state we committed. In fact, now
+"git-diff-cache" shows the same difference whether we use the "--cached"
+flag or not, since now the index is coherent with the working directory.
+
+Now, since we've updated "a" in the index, we can commit the new
+version. We could do it by writing the tree by hand again, and
+committing the tree (this time we'd have to use the "-p HEAD" flag to
+tell commit that the HEAD was the _parent_ of the new commit, and that
+this wasn't an initial commit any more), but you've done that once
+already, so let's just use the helpful script this time:
+
+ git commit
+
+which starts an editor for you to write the commit message and tells you
+a bit about what you're doing.
+
+Write whatever message you want, and all the lines that start with '#'
+will be pruned out, and the rest will be used as the commit message for
+the change. If you decide you don't want to commit anything after all at
+this point (you can continue to edit things and update the cache), you
+can just leave an empty message. Otherwise git-commit-script will commit
+the change for you.
+
+You've now made your first real git commit. And if you're interested in
+looking at what git-commit-script really does, feel free to investigate:
+it's a few very simple shell scripts to generate the helpful (?) commit
+message headers, and a few one-liners that actually do the commit itself.
+
+
+ Checking it out
+ ---------------
+
+While creating changes is useful, it's even more useful if you can tell
+later what changed. The most useful command for this is another of the
+"diff" family, namely "git-diff-tree".
+
+git-diff-tree can be given two arbitrary trees, and it will tell you the
+differences between them. Perhaps even more commonly, though, you can
+give it just a single commit object, and it will figure out the parent
+of that commit itself, and show the difference directly. Thus, to get
+the same diff that we've already seen several times, we can now do
+
+ git-diff-tree -p HEAD
+
+(again, "-p" means to show the difference as a human-readable patch),
+and it will show what the last commit (in HEAD) actually changed.
+
+More interestingly, you can also give git-diff-tree the "-v" flag, which
+tells it to also show the commit message and author and date of the
+commit, and you can tell it to show a whole series of diffs.
+Alternatively, you can tell it to be "silent", and not show the diffs at
+all, but just show the actual commit message.
+
+In fact, together with the "git-rev-list" program (which generates a
+list of revisions), git-diff-tree ends up being a veritable fount of
+changes. A trivial (but very useful) script called "git-whatchanged" is
+included with git which does exactly this, and shows a log of recent
+activity.
+
+To see the whole history of our pitiful little git-tutorial project, you
+can do
+
+ git log
+
+which shows just the log messages, or if we want to see the log together
+with the associated patches use the more complex (and much more
+powerful)
+
+ git-whatchanged -p --root
+
+and you will see exactly what has changed in the repository over its
+short history.
+
+[ Side note: the "--root" flag is a flag to git-diff-tree to tell it to
+ show the initial aka "root" commit too. Normally you'd probably not
+ want to see the initial import diff, but since the tutorial project
+ was started from scratch and is so small, we use it to make the result
+ a bit more interesting ]
+
+With that, you should now be having some inkling of what git does, and
+can explore on your own.
+
+
+[ Side note: most likely, you are not directly using the core
+ git Plumbing commands, but using Porcelain like Cogito on top
+ of it. Cogito works a bit differently and you usually do not
+ have to run "git-update-cache" yourself for changed files (you
+ do tell underlying git about additions and removals via
+ "cg-add" and "cg-rm" commands). Just before you make a commit
+ with "cg-commit", Cogito figures out which files you modified,
+ and runs "git-update-cache" on them for you. ]
+
+
+ Tagging a version
+ -----------------
+
+In git, there's two kinds of tags, a "light" one, and a "signed tag".
+
+A "light" tag is technically nothing more than a branch, except we put
+it in the ".git/refs/tags/" subdirectory instead of calling it a "head".
+So the simplest form of tag involves nothing more than
+
+ git tag my-first-tag
+
+which just writes the current HEAD into the .git/refs/tags/my-first-tag
+file, after which point you can then use this symbolic name for that
+particular state. You can, for example, do
+
+ git diff my-first-tag
+
+to diff your current state against that tag (which at this point will
+obviously be an empty diff, but if you continue to develop and commit
+stuff, you can use your tag as a "anchor-point" to see what has changed
+since you tagged it.
+
+A "signed tag" is actually a real git object, and contains not only a
+pointer to the state you want to tag, but also a small tag name and
+message, along with a PGP signature that says that yes, you really did
+that tag. You create these signed tags with the "-s" flag to "git tag":
+
+ git tag -s <tagname>
+
+which will sign the current HEAD (but you can also give it another
+argument that specifies the thing to tag, ie you could have tagged the
+current "mybranch" point by using "git tag <tagname> mybranch").
+
+You normally only do signed tags for major releases or things
+like that, while the light-weight tags are useful for any marking you
+want to do - any time you decide that you want to remember a certain
+point, just create a private tag for it, and you have a nice symbolic
+name for the state at that point.
+
+
+ Copying archives
+ -----------------
+
+Git archives are normally totally self-sufficient, and it's worth noting
+that unlike CVS, for example, there is no separate notion of
+"repository" and "working tree". A git repository normally _is_ the
+working tree, with the local git information hidden in the ".git"
+subdirectory. There is nothing else. What you see is what you got.
+
+[ Side note: you can tell git to split the git internal information from
+ the directory that it tracks, but we'll ignore that for now: it's not
+ how normal projects work, and it's really only meant for special uses.
+ So the mental model of "the git information is always tied directly to
+ the working directory that it describes" may not be technically 100%
+ accurate, but it's a good model for all normal use ]
+
+This has two implications:
+
+ - if you grow bored with the tutorial archive you created (or you've
+ made a mistake and want to start all over), you can just do simple
+
+ rm -rf git-tutorial
+
+ and it will be gone. There's no external repository, and there's no
+ history outside of the project you created.
+
+ - if you want to move or duplicate a git archive, you can do so. There
+ is "git clone" command, but if all you want to do is just to
+ create a copy of your archive (with all the full history that
+ went along with it), you can do so with a regular
+ "cp -a git-tutorial new-git-tutorial".
+
+ Note that when you've moved or copied a git archive, your git index
+ file (which caches various information, notably some of the "stat"
+ information for the files involved) will likely need to be refreshed.
+ So after you do a "cp -a" to create a new copy, you'll want to do
+
+ git-update-cache --refresh
+
+ to make sure that the index file is up-to-date in the new one.
+
+Note that the second point is true even across machines. You can
+duplicate a remote git archive with _any_ regular copy mechanism, be it
+"scp", "rsync" or "wget".
+
+When copying a remote repository, you'll want to at a minimum update the
+index cache when you do this, and especially with other peoples
+repositories you often want to make sure that the index cache is in some
+known state (you don't know _what_ they've done and not yet checked in),
+so usually you'll precede the "git-update-cache" with a
+
+ git-read-tree --reset HEAD
+ git-update-cache --refresh
+
+which will force a total index re-build from the tree pointed to by HEAD
+(it resets the index contents to HEAD, and then the git-update-cache
+makes sure to match up all index entries with the checked-out files).
+
+The above can also be written as simply
+
+ git reset
+
+and in fact a lot of the common git command combinations can be scripted
+with the "git xyz" interfaces, and you can learn things by just looking
+at what the git-*-script scripts do ("git reset" is the above two lines
+implemented in "git-reset-script", but some things like "git status" and
+"git commit" are slightly more complex scripts around the basic git
+commands).
+
+NOTE! Many (most?) public remote repositories will not contain any of
+the checked out files or even an index file, and will _only_ contain the
+actual core git files. Such a repository usually doesn't even have the
+".git" subdirectory, but has all the git files directly in the
+repository.
+
+To create your own local live copy of such a "raw" git repository, you'd
+first create your own subdirectory for the project, and then copy the
+raw repository contents into the ".git" directory. For example, to
+create your own copy of the git repository, you'd do the following
+
+ mkdir my-git
+ cd my-git
+ rsync -rL rsync://rsync.kernel.org/pub/scm/git/git.git/ my-git .git
+
+followed by
+
+ git-read-tree HEAD
+
+to populate the index. However, now you have populated the index, and
+you have all the git internal files, but you will notice that you don't
+actually have any of the _working_directory_ files to work on. To get
+those, you'd check them out with
+
+ git-checkout-cache -u -a
+
+where the "-u" flag means that you want the checkout to keep the index
+up-to-date (so that you don't have to refresh it afterward), and the
+"-a" flag means "check out all files" (if you have a stale copy or an
+older version of a checked out tree you may also need to add the "-f"
+flag first, to tell git-checkout-cache to _force_ overwriting of any old
+files).
+
+Again, this can all be simplified with
+
+ git clone rsync://rsync.kernel.org/pub/scm/git/git.git/ my-git
+ cd my-git
+ git checkout
+
+which will end up doing all of the above for you.
+
+You have now successfully copied somebody else's (mine) remote
+repository, and checked it out.
+
+
+ Creating a new branch
+ ---------------------
+
+Branches in git are really nothing more than pointers into the git
+object space from within the ".git/refs/" subdirectory, and as we
+already discussed, the HEAD branch is nothing but a symlink to one of
+these object pointers.
+
+You can at any time create a new branch by just picking an arbitrary
+point in the project history, and just writing the SHA1 name of that
+object into a file under .git/refs/heads/. You can use any filename you
+want (and indeed, subdirectories), but the convention is that the
+"normal" branch is called "master". That's just a convention, though,
+and nothing enforces it.
+
+To show that as an example, let's go back to the git-tutorial archive we
+used earlier, and create a branch in it. You do that by simply just
+saying that you want to check out a new branch:
+
+ git checkout -b mybranch
+
+will create a new branch based at the current HEAD position, and switch
+to it.
+
+[ Side note: if you make the decision to start your new branch at some
+ other point in the history than the current HEAD, you can do so by
+ just telling "git checkout" what the base of the checkout would be.
+ In other words, if you have an earlier tag or branch, you'd just do
+
+ git checkout -b mybranch earlier-branch
+
+ and it would create the new branch "mybranch" at the earlier point,
+ and check out the state at that time. ]
+
+You can always just jump back to your original "master" branch by doing
+
+ git checkout master
+
+(or any other branch-name, for that matter) and if you forget which
+branch you happen to be on, a simple
+
+ ls -l .git/HEAD
+
+will tell you where it's pointing.
+
+NOTE! Sometimes you may wish to create a new branch _without_ actually
+checking it out and switching to it. If so, just use the command
+
+ git branch <branchname> [startingpoint]
+
+which will simply _create_ the branch, but will not do anything further.
+You can then later - once you decide that you want to actually develop
+on that branch - switch to that branch with a regular "git checkout"
+with the branchname as the argument.
+
+
+ Merging two branches
+ --------------------
+
+One of the ideas of having a branch is that you do some (possibly
+experimental) work in it, and eventually merge it back to the main
+branch. So assuming you created the above "mybranch" that started out
+being the same as the original "master" branch, let's make sure we're in
+that branch, and do some work there.
+
+ git checkout mybranch
+ echo "Work, work, work" >> a
+ git commit a
+
+Here, we just added another line to "a", and we used a shorthand for
+both going a "git-update-cache a" and "git commit" by just giving the
+filename directly to "git commit".
+
+Now, to make it a bit more interesting, let's assume that somebody else
+does some work in the original branch, and simulate that by going back
+to the master branch, and editing the same file differently there:
+
+ git checkout master
+
+Here, take a moment to look at the contents of "a", and notice how they
+don't contain the work we just did in "mybranch" - because that work
+hasn't happened in the "master" branch at all. Then do
+
+ echo "Play, play, play" >> a
+ echo "Lots of fun" >> b
+ git commit a b
+
+since the master branch is obviously in a much better mood.
+
+Now, you've got two branches, and you decide that you want to merge the
+work done. Before we do that, let's introduce a cool graphical tool that
+helps you view what's going on:
+
+ gitk --all
+
+will show you graphically both of your branches (that's what the "--all"
+means: normally it will just show you your current HEAD) and their
+histories. You can also see exactly how they came to be from a common
+source.
+
+Anyway, let's exit gitk (^Q or the File menu), and decide that we want
+to merge the work we did on the "mybranch" branch into the "master"
+branch (which is currently our HEAD too). To do that, there's a nice
+script called "git resolve", which wants to know which branches you want
+to resolve and what the merge is all about:
+
+ git resolve HEAD mybranch "Merge work in mybranch"
+
+where the third argument is going to be used as the commit message if
+the merge can be resolved automatically.
+
+Now, in this case we've intentionally created a situation where the
+merge will need to be fixed up by hand, though, so git will do as much
+of it as it can automatically (which in this case is just merge the "b"
+file, which had no differences in the "mybranch" branch), and say:
+
+ Simple merge failed, trying Automatic merge
+ Auto-merging a.
+ merge: warning: conflicts during merge
+ ERROR: Merge conflict in a.
+ fatal: merge program failed
+ Automatic merge failed, fix up by hand
+
+which is way too verbose, but it basically tells you that it failed the
+really trivial merge ("Simple merge") and did an "Automatic merge"
+instead, but that too failed due to conflicts in "a".
+
+Not to worry. It left the (trivial) conflict in "a" in the same form you
+should already be well used to if you've ever used CVS, so let's just
+open "a" in our editor (whatever that may be), and fix it up somehow.
+I'd suggest just making it so that "a" contains all four lines:
+
+ Hello World
+ It's a new day for git
+ Play, play, play
+ Work, work, work
+
+and once you're happy with your manual merge, just do a
+
+ git commit a
+
+which will very loudly warn you that you're now committing a merge
+(which is correct, so never mind), and you can write a small merge
+message about your adventures in git-merge-land.
+
+After you're done, start up "gitk --all" to see graphically what the
+history looks like. Notice that "mybranch" still exists, and you can
+switch to it, and continue to work with it if you want to. The
+"mybranch" branch will not contain the merge, but next time you merge it
+from the "master" branch, git will know how you merged it, so you'll not
+have to do _that_ merge again.
+
+
+ Merging external work
+ ---------------------
+
+It's usually much more common that you merge with somebody else than
+merging with your own branches, so it's worth pointing out that git
+makes that very easy too, and in fact, it's not that different from
+doing a "git resolve". In fact, a remote merge ends up being nothing
+more than "fetch the work from a remote repository into a temporary tag"
+followed by a "git resolve".
+
+It's such a common thing to do that it's called "git pull", and you can
+simply do
+
+ git pull <remote-repository>
+
+and optionally give a branch-name for the remote end as a second
+argument.
+
+The "remote" repository can even be on the same machine. One of
+the following notations can be used to name the repository to
+pull from:
+
+ Rsync URL
+ rsync://remote.machine/path/to/repo.git/
+
+ HTTP(s) URL
+ http://remote.machine/path/to/repo.git/
+
+ GIT URL
+ git://remote.machine/path/to/repo.git/
+ remote.machine:/path/to/repo.git/
+
+ Local directory
+ /path/to/repo.git/
+
+[ Side Note: currently, HTTP transport is slightly broken in
+ that when the remote repository is "packed" they do not always
+ work. But we have not talked about packing repository yet, so
+ let's not worry too much about it for now. ]
+
+[ Digression: you could do without using any branches at all, by
+ keeping as many local repositories as you would like to have
+ branches, and merging between them with "git pull", just like
+ you merge between branches. The advantage of this approach is
+ that it lets you keep set of files for each "branch" checked
+ out and you may find it easier to switch back and forth if you
+ juggle multiple lines of development simultaneously. Of
+ course, you will pay the price of more disk usage to hold
+ multiple working trees, but disk space is cheap these days. ]
+
+It is likely that you will be pulling from the same remote
+repository from time to time. As a short hand, you can store
+the remote repository URL in a file under .git/branches/
+directory, like this:
+
+ mkdir -p .git/branches
+ echo rsync://kernel.org/pub/scm/git/git.git/ \
+ >.git/branches/linus
+
+and use the filenae to "git pull" instead of the full URL.
+The contents of a file under .git/branches can even be a prefix
+of a full URL, like this:
+
+ echo rsync://kernel.org/pub/.../jgarzik/
+ >.git/branches/jgarzik
+
+Examples.
+
+ (1) git pull linus
+ (2) git pull linus tag v0.99.1
+ (3) git pull jgarzik/netdev-2.6.git/ e100
+
+the above are equivalent to:
+
+ (1) git pull rsync://kernel.org/pub/scm/git/git.git/ HEAD
+ (2) git pull rsync://kernel.org/pub/scm/git/git.git/ tag v0.99.1
+ (3) git pull rsync://kernel.org/pub/.../jgarzik/netdev-2.6.git e100
+
+
+ Publishing your work
+ --------------------
+
+So we can use somebody else's work from a remote repository; but
+how can _you_ prepare a repository to let other people pull from
+it?
+
+Your do your real work in your working directory that has your
+primary repository hanging under it as its ".git" subdirectory.
+You _could_ make that repository accessible remotely and ask
+people to pull from it, but in practice that is not the way
+things are usually done. A recommended way is to have a public
+repository, make it reachable by other people, and when the
+changes you made in your primary working directory are in good
+shape, update the public repository from it. This is often
+called "pushing".
+
+[ Side note: this public repository could further be mirrored,
+ and that is how kernel.org git repositories are done. ]
+
+Publishing the changes from your local (private) repository to
+your remote (public) repository requires a write privilege on
+the remote machine. You need to have an SSH account there to
+run a single command, "git-receive-pack".
+
+First, you need to create an empty repository on the remote
+machine that will house your public repository. This empty
+repository will be populated and be kept up-to-date by pushing
+into it later. Obviously, this repository creation needs to be
+done only once.
+
+[ Digression: "git push" uses a pair of programs,
+ "git-send-pack" on your local machine, and "git-receive-pack"
+ on the remote machine. The communication between the two over
+ the network internally uses an SSH connection. ]
+
+Your private repository's GIT directory is usually .git, but
+your public repository is often named after the project name,
+i.e. "<project>.git". Let's create such a public repository for
+project "my-git". After logging into the remote machine, create
+an empty directory:
+
+ mkdir my-git.git
+
+Then, make that directory into a GIT repository by running
+git-init-db, but this time, since it's name is not the usual
+".git", we do things slightly differently:
+
+ GIT_DIR=my-git.git git-init-db
+
+Make sure this directory is available for others you want your
+changes to be pulled by via the transport of your choice. Also
+you need to make sure that you have the "git-receive-pack"
+program on the $PATH.
+
+[ Side note: many installations of sshd do not invoke your shell
+ as the login shell when you directly run programs; what this
+ means is that if your login shell is bash, only .bashrc is
+ read and not .bash_profile. As a workaround, make sure
+ .bashrc sets up $PATH so that you can run 'git-receive-pack'
+ program. ]
+
+Your "public repository" is now ready to accept your changes.
+Come back to the machine you have your private repository. From
+there, run this command:
+
+ git push <public-host>:/path/to/my-git.git master
+
+This synchronizes your public repository to match the named
+branch head (i.e. "master" in this case) and objects reachable
+from them in your current repository.
+
+As a real example, this is how I update my public git
+repository. Kernel.org mirror network takes care of the
+propagation to other publicly visible machines:
+
+ git push master.kernel.org:/pub/scm/git/git.git/
+
+
+[ Digression: your GIT "public" repository people can pull from
+ is different from a public CVS repository that lets read-write
+ access to multiple developers. It is a copy of _your_ primary
+ repository published for others to use, and you should not
+ push into it from more than one repository (this means, not
+ just disallowing other developers to push into it, but also
+ you should push into it from a single repository of yours).
+ Sharing the result of work done by multiple people are always
+ done by pulling (i.e. fetching and merging) from public
+ repositories of those people. Typically this is done by the
+ "project lead" person, and the resulting repository is
+ published as the public repository of the "project lead" for
+ everybody to base further changes on. ]
+
+
+ Packing your repository
+ -----------------------
+
+Earlier, we saw that one file under .git/objects/??/ directory
+is stored for each git object you create. This representation
+is convenient and efficient to create atomically and safely, but
+not so to transport over the network. Since git objects are
+immutable once they are created, there is a way to optimize the
+storage by "packing them together". The command
+
+ git repack
+
+will do it for you. If you followed the tutorial examples, you
+would have accumulated about 17 objects in .git/objects/??/
+directories by now. "git repack" tells you how many objects it
+packed, and stores the packed file in .git/objects/pack
+directory.
+
+[ Side Note: you will see two files, pack-*.pack and pack-*.idx,
+ in .git/objects/pack directory. They are closely related to
+ each other, and if you ever copy them by hand to a different
+ repository for whatever reason, you should make sure you copy
+ them together. The former holds all the data from the objects
+ in the pack, and the latter holds the index for random
+ access. ]
+
+If you are paranoid, running "git-verify-pack" command would
+detect if you have a corrupt pack, but do not worry too much.
+Our programs are always perfect ;-).
+
+Once you have packed objects, you do not need to leave the
+unpacked objects that are contained in the pack file anymore.
+
+ git prune-packed
+
+would remove them for you.
+
+You can try running "find .git/objects -type f" before and after
+you run "git prune-packed" if you are curious.
+
+[ Side Note: as we already mentioned, "git pull" is broken for
+ some transports dealing with packed repositories right now, so
+ do not run "git prune-packed" if you plan to give "git pull"
+ access via HTTP transport for now. ]
+
+If you run "git repack" again at this point, it will say
+"Nothing to pack". Once you continue your development and
+accumulate the changes, running "git repack" again will create a
+new pack, that contains objects created since you packed your
+archive the last time. We recommend that you pack your project
+soon after the initial import (unless you are starting your
+project from scratch), and then run "git repack" every once in a
+while, depending on how active your project is.
+
+When a repository is synchronized via "git push" and "git pull",
+objects packed in the source repository is usually stored
+unpacked in the destination, unless rsync transport is used.
+
+
+ Working with Others
+ -------------------
+
+Although git is a truly distributed system, it is often
+convenient to organize your project with an informal hierarchy
+of developers. Linux kernel development is run this way. There
+is a nice illustration (page 17, "Merges to Mainline") in Randy
+Dunlap's presentation (http://tinyurl.com/a2jdg).
+
+It should be stressed that this hierarchy is purely "informal".
+There is nothing fundamental in git that enforces the "chain of
+patch flow" this hierarchy implies. You do not have to pull
+from only one remote repository.
+
+
+A recommended workflow for a "project lead" goes like this:
+
+ (1) Prepare your primary repository on your local machine. Your
+ work is done there.
+
+ (2) Prepare a public repository accessible to others.
+
+ (3) Push into the public repository from your primary
+ repository.
+
+ (4) "git repack" the public repository. This establishes a big
+ pack that contains the initial set of objects as the
+ baseline, and possibly "git prune-packed" if the transport
+ used for pulling from your repository supports packed
+ repositories.
+
+ (5) Keep working in your primary repository. Your changes
+ include modifications of your own, patches you receive via
+ e-mails, and merges resulting from pulling the "public"
+ repositories of your "subsystem maintainers".
+
+ You can repack this private repository whenever you feel
+ like.
+
+ (6) Push your changes to the public repository, and announce it
+ to the public.
+
+ (7) Every once in a while, "git repack" the public repository.
+ Go back to step (5) and continue working.
+
+
+A recommended work cycle for a "subsystem maintainer" that works
+on that project and has own "public repository" goes like this:
+
+ (1) Prepare your work repository, by "git clone" the public
+ repository of the "project lead". The URL used for the
+ initial cloning is stored in .git/branches/origin.
+
+ (2) Prepare a public repository accessible to others.
+
+ (3) Copy over the packed files from "project lead" public
+ repository to your public repository by hand; this part is
+ currently not automated.
+
+ (4) Push into the public repository from your primary
+ repository. Run "git repack", and possibly "git
+ prune-packed" if the transport used for pulling from your
+ repository supports packed repositories.
+
+ (5) Keep working in your primary repository. Your changes
+ include modifications of your own, patches you receive via
+ e-mails, and merges resulting from pulling the "public"
+ repositories of your "project lead" and possibly your
+ "sub-subsystem maintainers".
+
+ You can repack this private repository whenever you feel
+ like.
+
+ (6) Push your changes to your public repository, and ask your
+ "project lead" and possibly your "sub-subsystem
+ maintainers" to pull from it.
+
+ (7) Every once in a while, "git repack" the public repository.
+ Go back to step (5) and continue working.
+
+
+A recommended work cycle for an "individual developer" who does
+not have a "public" repository is somewhat different. It goes
+like this:
+
+ (1) Prepare your work repository, by "git clone" the public
+ repository of the "project lead" (or a "subsystem
+ maintainer", if you work on a subsystem). The URL used for
+ the initial cloning is stored in .git/branches/origin.
+
+ (2) Do your work there. Make commits.
+
+ (3) Run "git fetch origin" from the public repository of your
+ upstream every once in a while. This does only the first
+ half of "git pull" but does not merge. The head of the
+ public repository is stored in .git/refs/heads/origin.
+
+ (4) Use "git cherry origin" to see which ones of your patches
+ were accepted, and/or use "git rebase origin" to port your
+ unmerged changes forward to the updated upstream.
+
+ (5) Use "git format-patch origin" to prepare patches for e-mail
+ submission to your upstream and send it out. Go back to
+ step (2) and continue.
+
+
+[ to be continued.. cvsimports ]
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..43f2bb9
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,66 @@
+
+ Git installation
+
+Normally you can just do "make" followed by "make install", and that
+will install the git programs in your own ~/bin/ directory. If you want
+to do a global install, you can do
+
+ make prefix=/usr install
+
+(or prefix=/usr/local, of course). Some day somebody may send me a RPM
+spec file or something, and you can do "make rpm" or whatever.
+
+Issues of note:
+
+ - git normally installs a helper script wrapper called "git", which
+ conflicts with a similarly named "GNU interactive tools" program.
+
+ Tough. Either don't use the wrapper script, or delete the old GNU
+ interactive tools. None of the core git stuff needs the wrapper,
+ it's just a convenient shorthand and while it is documented in some
+ places, you can always replace "git commit" with "git-commit-script"
+ instead.
+
+ But let's face it, most of us don't have GNU interactive tools, and
+ even if we had it, we wouldn't know what it does. I don't think it
+ has been actively developed since 1997, and people have moved over to
+ graphical file managers.
+
+ - Git is reasonably self-sufficient, but does depend on a few external
+ programs and libraries:
+
+ - "zlib", the compression library. Git won't build without it.
+
+ - "openssl". The git-rev-list program uses bignum support from
+ openssl, and unless you specify otherwise, you'll also get the
+ SHA1 library from here.
+
+ If you don't have openssl, you can use one of the SHA1 libraries
+ that come with git (git includes the one from Mozilla, and has
+ its own PowerPC-optimized one too - see the Makefile), and you
+ can avoid the bignum support by excising git-rev-list support
+ for "--merge-order" (by hand).
+
+ - "libcurl" and "curl" executable. git-http-pull and
+ git-fetch-script use them. If you do not use http
+ transfer, you are probabaly OK if you do not have
+ them.
+
+ - "GNU diff" to generate patches. Of course, you don't _have_ to
+ generate patches if you don't want to, but let's face it, you'll
+ be wanting to. Or why did you get git in the first place?
+
+ Non-GNU versions of the diff/patch programs don't generally support
+ the unified patch format (which is the one git uses), so you
+ really do want to get the GNU one. Trust me, you will want to
+ do that even if it wasn't for git. There's no point in living
+ in the dark ages any more.
+
+ - "merge", the standard UNIX three-way merge program. It usually
+ comes with the "rcs" package on most Linux distributions, so if
+ you have a developer install you probably have it already, but a
+ "graphical user desktop" install might have left it out.
+
+ You'll only need the merge program if you do development using
+ git, and if you only use git to track other peoples work you'll
+ never notice the lack of it.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f580576
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,227 @@
+# -DCOLLISION_CHECK if you believe that SHA1's
+# 1461501637330902918203684832716283019655932542976 hashes do not give you
+# enough guarantees about no collisions between objects ever hapenning.
+#
+# -DUSE_NSEC if you want git to care about sub-second file mtimes and ctimes.
+# -DUSE_STDEV if you want git to care about st_dev changing
+#
+# Note that you need some new glibc (at least >2.2.4) for this, and it will
+# BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely randomly
+# break unless your underlying filesystem supports those sub-second times
+# (my ext3 doesn't).
+GIT_VERSION=0.99.2
+
+COPTS=-O2
+CFLAGS=-g $(COPTS) -Wall
+
+prefix=$(HOME)
+bin=$(prefix)/bin
+# dest=
+
+CC=gcc
+AR=ar
+INSTALL=install
+RPMBUILD=rpmbuild
+
+#
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+#
+SPARSE_FLAGS=-D__BIG_ENDIAN__ -D__powerpc__
+
+SCRIPTS=git git-apply-patch-script git-merge-one-file-script git-prune-script \
+ git-pull-script git-tag-script git-resolve-script git-whatchanged \
+ git-fetch-script git-status-script git-commit-script \
+ git-log-script git-shortlog git-cvsimport-script git-diff-script \
+ git-reset-script git-add-script git-checkout-script git-clone-script \
+ gitk git-cherry git-rebase-script git-relink-script git-repack-script \
+ git-format-patch-script git-sh-setup-script git-push-script \
+ git-branch-script git-parse-remote git-verify-tag-script \
+ git-ls-remote-script git-clone-dumb-http git-rename-script \
+ git-request-pull-script
+
+PROG= git-update-cache git-diff-files git-init-db git-write-tree \
+ git-read-tree git-commit-tree git-cat-file git-fsck-cache \
+ git-checkout-cache git-diff-tree git-rev-tree git-ls-files \
+ git-check-files git-ls-tree git-merge-base git-merge-cache \
+ git-unpack-file git-export git-diff-cache git-convert-cache \
+ git-http-pull git-ssh-push git-ssh-pull git-rev-list git-mktag \
+ git-diff-helper git-tar-tree git-local-pull git-hash-object \
+ git-get-tar-commit-id git-apply git-stripspace \
+ git-diff-stages git-rev-parse git-patch-id git-pack-objects \
+ git-unpack-objects git-verify-pack git-receive-pack git-send-pack \
+ git-prune-packed git-fetch-pack git-upload-pack git-clone-pack \
+ git-show-index git-daemon git-var git-peek-remote \
+ git-update-server-info git-show-rev-cache git-build-rev-cache
+
+all: $(PROG)
+
+install: $(PROG) $(SCRIPTS)
+ $(INSTALL) -m755 -d $(dest)$(bin)
+ $(INSTALL) $(PROG) $(SCRIPTS) $(dest)$(bin)
+
+LIB_OBJS=read-cache.o sha1_file.o usage.o object.o commit.o tree.o blob.o \
+ tag.o date.o index.o diff-delta.o patch-delta.o entry.o path.o \
+ epoch.o refs.o csum-file.o pack-check.o pkt-line.o connect.o ident.o
+LIB_FILE=libgit.a
+LIB_H=cache.h object.h blob.h tree.h commit.h tag.h delta.h epoch.h csum-file.h \
+ pack.h pkt-line.h refs.h
+
+LIB_H += rev-cache.h
+LIB_OBJS += rev-cache.o
+
+LIB_H += strbuf.h
+LIB_OBJS += strbuf.o
+
+LIB_H += quote.h
+LIB_OBJS += quote.o
+
+LIB_H += diff.h count-delta.h
+LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o diffcore-pathspec.o \
+ count-delta.o diffcore-break.o diffcore-order.o
+
+LIB_OBJS += gitenv.o
+LIB_OBJS += server-info.o
+
+LIBS = $(LIB_FILE)
+LIBS += -lz
+
+ifdef MOZILLA_SHA1
+ SHA1_HEADER="mozilla-sha1/sha1.h"
+ LIB_OBJS += mozilla-sha1/sha1.o
+else
+ifdef PPC_SHA1
+ SHA1_HEADER="ppc/sha1.h"
+ LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
+else
+ SHA1_HEADER=<openssl/sha.h>
+ifeq ($(shell uname -s),Darwin)
+ LIBS += -lcrypto -lssl
+else
+ LIBS += -lcrypto
+endif
+endif
+endif
+
+CFLAGS += '-DSHA1_HEADER=$(SHA1_HEADER)'
+
+$(LIB_FILE): $(LIB_OBJS)
+ $(AR) rcs $@ $(LIB_OBJS)
+
+check:
+ for i in *.c; do sparse $(CFLAGS) $(SPARSE_FLAGS) $$i; done
+
+test-date: test-date.c date.o
+ $(CC) $(CFLAGS) -o $@ test-date.c date.o
+
+test-delta: test-delta.c diff-delta.o patch-delta.o
+ $(CC) $(CFLAGS) -o $@ $^
+
+git-%: %.c $(LIB_FILE)
+ $(CC) $(CFLAGS) -o $@ $(filter %.c,$^) $(LIBS)
+
+git-update-cache: update-cache.c
+git-diff-files: diff-files.c
+git-init-db: init-db.c
+git-write-tree: write-tree.c
+git-read-tree: read-tree.c
+git-commit-tree: commit-tree.c
+git-cat-file: cat-file.c
+git-fsck-cache: fsck-cache.c
+git-checkout-cache: checkout-cache.c
+git-diff-tree: diff-tree.c
+git-rev-tree: rev-tree.c
+git-ls-files: ls-files.c
+git-check-files: check-files.c
+git-ls-tree: ls-tree.c
+git-merge-base: merge-base.c
+git-merge-cache: merge-cache.c
+git-unpack-file: unpack-file.c
+git-export: export.c
+git-diff-cache: diff-cache.c
+git-convert-cache: convert-cache.c
+git-http-pull: http-pull.c pull.c
+git-local-pull: local-pull.c pull.c
+git-ssh-push: rsh.c
+git-ssh-pull: rsh.c pull.c
+git-rev-list: rev-list.c
+git-mktag: mktag.c
+git-diff-helper: diff-helper.c
+git-tar-tree: tar-tree.c
+git-hash-object: hash-object.c
+git-stripspace: stripspace.c
+git-diff-stages: diff-stages.c
+git-rev-parse: rev-parse.c
+git-patch-id: patch-id.c
+git-pack-objects: pack-objects.c
+git-unpack-objects: unpack-objects.c
+git-verify-pack: verify-pack.c
+git-receive-pack: receive-pack.c
+git-send-pack: send-pack.c
+git-prune-packed: prune-packed.c
+git-fetch-pack: fetch-pack.c
+git-var: var.c
+git-peek-remote: peek-remote.c
+git-update-server-info: update-server-info.c
+git-build-rev-cache: build-rev-cache.c
+git-show-rev-cache: show-rev-cache.c
+
+git-http-pull: LIBS += -lcurl
+git-rev-list: LIBS += -lssl
+
+# Library objects..
+blob.o: $(LIB_H)
+tree.o: $(LIB_H)
+commit.o: $(LIB_H)
+tag.o: $(LIB_H)
+object.o: $(LIB_H)
+read-cache.o: $(LIB_H)
+sha1_file.o: $(LIB_H)
+usage.o: $(LIB_H)
+rev-cache.o: $(LIB_H)
+strbuf.o: $(LIB_H)
+gitenv.o: $(LIB_H)
+entry.o: $(LIB_H)
+diff.o: $(LIB_H) diffcore.h
+diffcore-rename.o : $(LIB_H) diffcore.h
+diffcore-pathspec.o : $(LIB_H) diffcore.h
+diffcore-pickaxe.o : $(LIB_H) diffcore.h
+diffcore-break.o : $(LIB_H) diffcore.h
+diffcore-order.o : $(LIB_H) diffcore.h
+epoch.o: $(LIB_H)
+
+git-core.spec: git-core.spec.in Makefile
+ sed -e 's/@@VERSION@@/$(GIT_VERSION)/g' < $< > $@
+
+GIT_TARNAME=git-core-$(GIT_VERSION)
+dist: git-core.spec git-tar-tree
+ ./git-tar-tree HEAD $(GIT_TARNAME) > $(GIT_TARNAME).tar
+ @mkdir -p $(GIT_TARNAME)
+ @cp git-core.spec $(GIT_TARNAME)
+ tar rf $(GIT_TARNAME).tar $(GIT_TARNAME)/git-core.spec
+ @rm -rf $(GIT_TARNAME)
+ gzip -f -9 $(GIT_TARNAME).tar
+
+rpm: dist
+ $(RPMBUILD) -ta git-core-$(GIT_VERSION).tar.gz
+
+test: all
+ $(MAKE) -C t/ all
+
+doc:
+ $(MAKE) -C Documentation all
+
+install-tools:
+ $(MAKE) -C tools install
+
+install-doc:
+ $(MAKE) -C Documentation install
+
+clean:
+ rm -f *.o mozilla-sha1/*.o ppc/*.o $(PROG) $(LIB_FILE)
+ rm -f git-core-*.tar.gz git-core.spec
+ $(MAKE) -C tools/ clean
+ $(MAKE) -C Documentation/ clean
+
+backup: clean
+ cd .. ; tar czvf dircache.tar.gz dir-cache
diff --git a/README b/README
new file mode 100644
index 0000000..80cc279
--- /dev/null
+++ b/README
@@ -0,0 +1,482 @@
+////////////////////////////////////////////////////////////////
+
+ GIT - the stupid content tracker
+
+////////////////////////////////////////////////////////////////
+"git" can mean anything, depending on your mood.
+
+ - random three-letter combination that is pronounceable, and not
+ actually used by any common UNIX command. The fact that it is a
+ mispronunciation of "get" may or may not be relevant.
+ - stupid. contemptible and despicable. simple. Take your pick from the
+ dictionary of slang.
+ - "global information tracker": you're in a good mood, and it actually
+ works for you. Angels sing, and a light suddenly fills the room.
+ - "goddamn idiotic truckload of sh*t": when it breaks
+
+This is a stupid (but extremely fast) directory content manager. It
+doesn't do a whole lot, but what it _does_ do is track directory
+contents efficiently.
+
+There are two object abstractions: the "object database", and the
+"current directory cache" aka "index".
+
+The Object Database
+~~~~~~~~~~~~~~~~~~~
+The object database is literally just a content-addressable collection
+of objects. All objects are named by their content, which is
+approximated by the SHA1 hash of the object itself. Objects may refer
+to other objects (by referencing their SHA1 hash), and so you can
+build up a hierarchy of objects.
+
+All objects have a statically determined "type" aka "tag", which is
+determined at object creation time, and which identifies the format of
+the object (i.e. how it is used, and how it can refer to other
+objects). There are currently four different object types: "blob",
+"tree", "commit" and "tag".
+
+A "blob" object cannot refer to any other object, and is, like the tag
+implies, a pure storage object containing some user data. It is used to
+actually store the file data, i.e. a blob object is associated with some
+particular version of some file.
+
+A "tree" object is an object that ties one or more "blob" objects into a
+directory structure. In addition, a tree object can refer to other tree
+objects, thus creating a directory hierarchy.
+
+A "commit" object ties such directory hierarchies together into
+a DAG of revisions - each "commit" is associated with exactly one tree
+(the directory hierarchy at the time of the commit). In addition, a
+"commit" refers to one or more "parent" commit objects that describe the
+history of how we arrived at that directory hierarchy.
+
+As a special case, a commit object with no parents is called the "root"
+object, and is the point of an initial project commit. Each project
+must have at least one root, and while you can tie several different
+root objects together into one project by creating a commit object which
+has two or more separate roots as its ultimate parents, that's probably
+just going to confuse people. So aim for the notion of "one root object
+per project", even if git itself does not enforce that.
+
+A "tag" object symbolically identifies and can be used to sign other
+objects. It contains the identifier and type of another object, a
+symbolic name (of course!) and, optionally, a signature.
+
+Regardless of object type, all objects share the following
+characteristics: they are all deflated with zlib, and have a header
+that not only specifies their tag, but also provides size information
+about the data in the object. It's worth noting that the SHA1 hash
+that is used to name the object is the hash of the original data.
+(Historical note: in the dawn of the age of git the hash
+was the sha1 of the _compressed_ object)
+
+As a result, the general consistency of an object can always be tested
+independently of the contents or the type of the object: all objects can
+be validated by verifying that (a) their hashes match the content of the
+file and (b) the object successfully inflates to a stream of bytes that
+forms a sequence of <ascii tag without space> + <space> + <ascii decimal
+size> + <byte\0> + <binary object data>.
+
+The structured objects can further have their structure and
+connectivity to other objects verified. This is generally done with
+the "git-fsck-cache" program, which generates a full dependency graph
+of all objects, and verifies their internal consistency (in addition
+to just verifying their superficial consistency through the hash).
+
+The object types in some more detail:
+
+Blob Object
+~~~~~~~~~~~
+A "blob" object is nothing but a binary blob of data, and doesn't
+refer to anything else. There is no signature or any other
+verification of the data, so while the object is consistent (it _is_
+indexed by its sha1 hash, so the data itself is certainly correct), it
+has absolutely no other attributes. No name associations, no
+permissions. It is purely a blob of data (i.e. normally "file
+contents").
+
+In particular, since the blob is entirely defined by its data, if two
+files in a directory tree (or in multiple different versions of the
+repository) have the same contents, they will share the same blob
+object. The object is totally independent of it's location in the
+directory tree, and renaming a file does not change the object that
+file is associated with in any way.
+
+A blob is typically created when link:git-update-cache.html[git-update-cache]
+is run, and it's data can be accessed by link:git-cat-file.html[git-cat-file].
+
+Tree Object
+~~~~~~~~~~~
+The next hierarchical object type is the "tree" object. A tree object
+is a list of mode/name/blob data, sorted by name. Alternatively, the
+mode data may specify a directory mode, in which case instead of
+naming a blob, that name is associated with another TREE object.
+
+Like the "blob" object, a tree object is uniquely determined by the
+set contents, and so two separate but identical trees will always
+share the exact same object. This is true at all levels, i.e. it's
+true for a "leaf" tree (which does not refer to any other trees, only
+blobs) as well as for a whole subdirectory.
+
+For that reason a "tree" object is just a pure data abstraction: it
+has no history, no signatures, no verification of validity, except
+that since the contents are again protected by the hash itself, we can
+trust that the tree is immutable and its contents never change.
+
+So you can trust the contents of a tree to be valid, the same way you
+can trust the contents of a blob, but you don't know where those
+contents _came_ from.
+
+Side note on trees: since a "tree" object is a sorted list of
+"filename+content", you can create a diff between two trees without
+actually having to unpack two trees. Just ignore all common parts,
+and your diff will look right. In other words, you can effectively
+(and efficiently) tell the difference between any two random trees by
+O(n) where "n" is the size of the difference, rather than the size of
+the tree.
+
+Side note 2 on trees: since the name of a "blob" depends entirely and
+exclusively on its contents (i.e. there are no names or permissions
+involved), you can see trivial renames or permission changes by
+noticing that the blob stayed the same. However, renames with data
+changes need a smarter "diff" implementation.
+
+A tree is created with link:git-write-tree.html[git-write-tree] and
+it's data can be accessed by link:git-ls-tree.html[git-ls-tree]
+
+Commit Object
+~~~~~~~~~~~~~
+The "commit" object is an object that introduces the notion of
+history into the picture. In contrast to the other objects, it
+doesn't just describe the physical state of a tree, it describes how
+we got there, and why.
+
+A "commit" is defined by the tree-object that it results in, the
+parent commits (zero, one or more) that led up to that point, and a
+comment on what happened. Again, a commit is not trusted per se:
+the contents are well-defined and "safe" due to the cryptographically
+strong signatures at all levels, but there is no reason to believe
+that the tree is "good" or that the merge information makes sense.
+The parents do not have to actually have any relationship with the
+result, for example.
+
+Note on commits: unlike real SCM's, commits do not contain
+rename information or file mode chane information. All of that is
+implicit in the trees involved (the result tree, and the result trees
+of the parents), and describing that makes no sense in this idiotic
+file manager.
+
+A commit is created with link:git-commit-tree.html[git-commit-tree] and
+it's data can be accessed by link:git-cat-file.html[git-cat-file]
+
+Trust
+~~~~~
+An aside on the notion of "trust". Trust is really outside the scope
+of "git", but it's worth noting a few things. First off, since
+everything is hashed with SHA1, you _can_ trust that an object is
+intact and has not been messed with by external sources. So the name
+of an object uniquely identifies a known state - just not a state that
+you may want to trust.
+
+Furthermore, since the SHA1 signature of a commit refers to the
+SHA1 signatures of the tree it is associated with and the signatures
+of the parent, a single named commit specifies uniquely a whole set
+of history, with full contents. You can't later fake any step of the
+way once you have the name of a commit.
+
+So to introduce some real trust in the system, the only thing you need
+to do is to digitally sign just _one_ special note, which includes the
+name of a top-level commit. Your digital signature shows others
+that you trust that commit, and the immutability of the history of
+commits tells others that they can trust the whole history.
+
+In other words, you can easily validate a whole archive by just
+sending out a single email that tells the people the name (SHA1 hash)
+of the top commit, and digitally sign that email using something
+like GPG/PGP.
+
+To assist in this, git also provides the tag object...
+
+Tag Object
+~~~~~~~~~~
+Git provides the "tag" object to simplify creating, managing and
+exchanging symbolic and signed tokens. The "tag" object at its
+simplest simply symbolically identifies another object by containing
+the sha1, type and symbolic name.
+
+However it can optionally contain additional signature information
+(which git doesn't care about as long as there's less than 8k of
+it). This can then be verified externally to git.
+
+Note that despite the tag features, "git" itself only handles content
+integrity; the trust framework (and signature provision and
+verification) has to come from outside.
+
+A tag is created with link:git-mktag.html[git-mktag] and
+it's data can be accessed by link:git-cat-file.html[git-cat-file]
+
+
+The "index" aka "Current Directory Cache"
+-----------------------------------------
+The index is a simple binary file, which contains an efficient
+representation of a virtual directory content at some random time. It
+does so by a simple array that associates a set of names, dates,
+permissions and content (aka "blob") objects together. The cache is
+always kept ordered by name, and names are unique (with a few very
+specific rules) at any point in time, but the cache has no long-term
+meaning, and can be partially updated at any time.
+
+In particular, the index certainly does not need to be consistent with
+the current directory contents (in fact, most operations will depend on
+different ways to make the index _not_ be consistent with the directory
+hierarchy), but it has three very important attributes:
+
+'(a) it can re-generate the full state it caches (not just the
+directory structure: it contains pointers to the "blob" objects so
+that it can regenerate the data too)'
+
+As a special case, there is a clear and unambiguous one-way mapping
+from a current directory cache to a "tree object", which can be
+efficiently created from just the current directory cache without
+actually looking at any other data. So a directory cache at any one
+time uniquely specifies one and only one "tree" object (but has
+additional data to make it easy to match up that tree object with what
+has happened in the directory)
+
+'(b) it has efficient methods for finding inconsistencies between that
+cached state ("tree object waiting to be instantiated") and the
+current state.'
+
+'(c) it can additionally efficiently represent information about merge
+conflicts between different tree objects, allowing each pathname to be
+associated with sufficient information about the trees involved that
+you can create a three-way merge between them.'
+
+Those are the three ONLY things that the directory cache does. It's a
+cache, and the normal operation is to re-generate it completely from a
+known tree object, or update/compare it with a live tree that is being
+developed. If you blow the directory cache away entirely, you generally
+haven't lost any information as long as you have the name of the tree
+that it described.
+
+At the same time, the directory index is at the same time also the
+staging area for creating new trees, and creating a new tree always
+involves a controlled modification of the index file. In particular,
+the index file can have the representation of an intermediate tree that
+has not yet been instantiated. So the index can be thought of as a
+write-back cache, which can contain dirty information that has not yet
+been written back to the backing store.
+
+
+
+The Workflow
+------------
+Generally, all "git" operations work on the index file. Some operations
+work *purely* on the index file (showing the current state of the
+index), but most operations move data to and from the index file. Either
+from the database or from the working directory. Thus there are four
+main combinations:
+
+1) working directory -> index
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You update the index with information from the working directory with
+the link:git-update-cache.html[git-update-cache] command. You
+generally update the index information by just specifying the filename
+you want to update, like so:
+
+ git-update-cache filename
+
+but to avoid common mistakes with filename globbing etc, the command
+will not normally add totally new entries or remove old entries,
+i.e. it will normally just update existing cache entries.
+
+To tell git that yes, you really do realize that certain files no
+longer exist in the archive, or that new files should be added, you
+should use the "--remove" and "--add" flags respectively.
+
+NOTE! A "--remove" flag does _not_ mean that subsequent filenames will
+necessarily be removed: if the files still exist in your directory
+structure, the index will be updated with their new status, not
+removed. The only thing "--remove" means is that update-cache will be
+considering a removed file to be a valid thing, and if the file really
+does not exist any more, it will update the index accordingly.
+
+As a special case, you can also do "git-update-cache --refresh", which
+will refresh the "stat" information of each index to match the current
+stat information. It will _not_ update the object status itself, and
+it will only update the fields that are used to quickly test whether
+an object still matches its old backing store object.
+
+2) index -> object database
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You write your current index file to a "tree" object with the program
+
+ git-write-tree
+
+that doesn't come with any options - it will just write out the
+current index into the set of tree objects that describe that state,
+and it will return the name of the resulting top-level tree. You can
+use that tree to re-generate the index at any time by going in the
+other direction:
+
+3) object database -> index
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You read a "tree" file from the object database, and use that to
+populate (and overwrite - don't do this if your index contains any
+unsaved state that you might want to restore later!) your current
+index. Normal operation is just
+
+ git-read-tree <sha1 of tree>
+
+and your index file will now be equivalent to the tree that you saved
+earlier. However, that is only your _index_ file: your working
+directory contents have not been modified.
+
+4) index -> working directory
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You update your working directory from the index by "checking out"
+files. This is not a very common operation, since normally you'd just
+keep your files updated, and rather than write to your working
+directory, you'd tell the index files about the changes in your
+working directory (i.e. "git-update-cache").
+
+However, if you decide to jump to a new version, or check out somebody
+else's version, or just restore a previous tree, you'd populate your
+index file with read-tree, and then you need to check out the result
+with
+ git-checkout-cache filename
+
+or, if you want to check out all of the index, use "-a".
+
+NOTE! git-checkout-cache normally refuses to overwrite old files, so
+if you have an old version of the tree already checked out, you will
+need to use the "-f" flag (_before_ the "-a" flag or the filename) to
+_force_ the checkout.
+
+
+Finally, there are a few odds and ends which are not purely moving
+from one representation to the other:
+
+5) Tying it all together
+~~~~~~~~~~~~~~~~~~~~~~~~
+To commit a tree you have instantiated with "git-write-tree", you'd
+create a "commit" object that refers to that tree and the history
+behind it - most notably the "parent" commits that preceded it in
+history.
+
+Normally a "commit" has one parent: the previous state of the tree
+before a certain change was made. However, sometimes it can have two
+or more parent commits, in which case we call it a "merge", due to the
+fact that such a commit brings together ("merges") two or more
+previous states represented by other commits.
+
+In other words, while a "tree" represents a particular directory state
+of a working directory, a "commit" represents that state in "time",
+and explains how we got there.
+
+You create a commit object by giving it the tree that describes the
+state at the time of the commit, and a list of parents:
+
+ git-commit-tree <tree> -p <parent> [-p <parent2> ..]
+
+and then giving the reason for the commit on stdin (either through
+redirection from a pipe or file, or by just typing it at the tty).
+
+git-commit-tree will return the name of the object that represents
+that commit, and you should save it away for later use. Normally,
+you'd commit a new "HEAD" state, and while git doesn't care where you
+save the note about that state, in practice we tend to just write the
+result to the file ".git/HEAD", so that we can always see what the
+last committed state was.
+
+6) Examining the data
+~~~~~~~~~~~~~~~~~~~~~
+
+You can examine the data represented in the object database and the
+index with various helper tools. For every object, you can use
+link:git-cat-file.html[git-cat-file] to examine details about the
+object:
+
+ git-cat-file -t <objectname>
+
+shows the type of the object, and once you have the type (which is
+usually implicit in where you find the object), you can use
+
+ git-cat-file blob|tree|commit <objectname>
+
+to show its contents. NOTE! Trees have binary content, and as a result
+there is a special helper for showing that content, called
+"git-ls-tree", which turns the binary content into a more easily
+readable form.
+
+It's especially instructive to look at "commit" objects, since those
+tend to be small and fairly self-explanatory. In particular, if you
+follow the convention of having the top commit name in ".git/HEAD",
+you can do
+
+ git-cat-file commit $(cat .git/HEAD)
+
+to see what the top commit was.
+
+7) Merging multiple trees
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Git helps you do a three-way merge, which you can expand to n-way by
+repeating the merge procedure arbitrary times until you finally
+"commit" the state. The normal situation is that you'd only do one
+three-way merge (two parents), and commit it, but if you like to, you
+can do multiple parents in one go.
+
+To do a three-way merge, you need the two sets of "commit" objects
+that you want to merge, use those to find the closest common parent (a
+third "commit" object), and then use those commit objects to find the
+state of the directory ("tree" object) at these points.
+
+To get the "base" for the merge, you first look up the common parent
+of two commits with
+
+ git-merge-base <commit1> <commit2>
+
+which will return you the commit they are both based on. You should
+now look up the "tree" objects of those commits, which you can easily
+do with (for example)
+
+ git-cat-file commit <commitname> | head -1
+
+since the tree object information is always the first line in a commit
+object.
+
+Once you know the three trees you are going to merge (the one
+"original" tree, aka the common case, and the two "result" trees, aka
+the branches you want to merge), you do a "merge" read into the
+index. This will throw away your old index contents, so you should
+make sure that you've committed those - in fact you would normally
+always do a merge against your last commit (which should thus match
+what you have in your current index anyway).
+
+To do the merge, do
+
+ git-read-tree -m <origtree> <target1tree> <target2tree>
+
+which will do all trivial merge operations for you directly in the
+index file, and you can just write the result out with
+"git-write-tree".
+
+NOTE! Because the merge is done in the index file, and not in your
+working directory, your working directory will no longer match your
+index. You can use "git-checkout-cache -f -a" to make the effect of
+the merge be seen in your working directory.
+
+NOTE2! Sadly, many merges aren't trivial. If there are files that have
+been added.moved or removed, or if both branches have modified the
+same file, you will be left with an index tree that contains "merge
+entries" in it. Such an index tree can _NOT_ be written out to a tree
+object, and you will have to resolve any such merge clashes using
+other tools before you can write out the result.
+
+
+[ fixme: talk about resolving merges here ]
diff --git a/apply.c b/apply.c
new file mode 100644
index 0000000..c671d9e
--- /dev/null
+++ b/apply.c
@@ -0,0 +1,1516 @@
+/*
+ * apply.c
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ *
+ * This applies patches on top of some (arbitrary) version of the SCM.
+ *
+ * NOTE! It does all its work in the index file, and only cares about
+ * the files in the working directory if you tell it to "merge" the
+ * patch apply.
+ *
+ * Even when merging it always takes the source from the index, and
+ * uses the working tree as a "branch" for a 3-way merge.
+ */
+#include <ctype.h>
+#include <fnmatch.h>
+#include "cache.h"
+
+// We default to the merge behaviour, since that's what most people would
+// expect.
+//
+// --check turns on checking that the working tree matches the
+// files that are being modified, but doesn't apply the patch
+// --stat does just a diffstat, and doesn't actually apply
+// --show-files shows the directory changes
+//
+static int merge_patch = 1;
+static int check_index = 0;
+static int write_index = 0;
+static int diffstat = 0;
+static int summary = 0;
+static int check = 0;
+static int apply = 1;
+static int show_files = 0;
+static const char apply_usage[] =
+"git-apply [--no-merge] [--stat] [--summary] [--check] [--index] [--apply] [--show-files] <patch>...";
+
+/*
+ * For "diff-stat" like behaviour, we keep track of the biggest change
+ * we've seen, and the longest filename. That allows us to do simple
+ * scaling.
+ */
+static int max_change, max_len;
+
+/*
+ * Various "current state", notably line numbers and what
+ * file (and how) we're patching right now.. The "is_xxxx"
+ * things are flags, where -1 means "don't know yet".
+ */
+static int linenr = 1;
+
+struct fragment {
+ unsigned long oldpos, oldlines;
+ unsigned long newpos, newlines;
+ const char *patch;
+ int size;
+ struct fragment *next;
+};
+
+struct patch {
+ char *new_name, *old_name, *def_name;
+ unsigned int old_mode, new_mode;
+ int is_rename, is_copy, is_new, is_delete;
+ int lines_added, lines_deleted;
+ int score;
+ struct fragment *fragments;
+ char *result;
+ unsigned long resultsize;
+ struct patch *next;
+};
+
+#define CHUNKSIZE (8192)
+#define SLOP (16)
+
+static void *read_patch_file(int fd, unsigned long *sizep)
+{
+ unsigned long size = 0, alloc = CHUNKSIZE;
+ void *buffer = xmalloc(alloc);
+
+ for (;;) {
+ int nr = alloc - size;
+ if (nr < 1024) {
+ alloc += CHUNKSIZE;
+ buffer = xrealloc(buffer, alloc);
+ nr = alloc - size;
+ }
+ nr = read(fd, buffer + size, nr);
+ if (!nr)
+ break;
+ if (nr < 0) {
+ if (errno == EAGAIN)
+ continue;
+ die("git-apply: read returned %s", strerror(errno));
+ }
+ size += nr;
+ }
+ *sizep = size;
+
+ /*
+ * Make sure that we have some slop in the buffer
+ * so that we can do speculative "memcmp" etc, and
+ * see to it that it is NUL-filled.
+ */
+ if (alloc < size + SLOP)
+ buffer = xrealloc(buffer, size + SLOP);
+ memset(buffer + size, 0, SLOP);
+ return buffer;
+}
+
+static unsigned long linelen(const char *buffer, unsigned long size)
+{
+ unsigned long len = 0;
+ while (size--) {
+ len++;
+ if (*buffer++ == '\n')
+ break;
+ }
+ return len;
+}
+
+static int is_dev_null(const char *str)
+{
+ return !memcmp("/dev/null", str, 9) && isspace(str[9]);
+}
+
+#define TERM_SPACE 1
+#define TERM_TAB 2
+
+static int name_terminate(const char *name, int namelen, int c, int terminate)
+{
+ if (c == ' ' && !(terminate & TERM_SPACE))
+ return 0;
+ if (c == '\t' && !(terminate & TERM_TAB))
+ return 0;
+
+ return 1;
+}
+
+static char * find_name(const char *line, char *def, int p_value, int terminate)
+{
+ int len;
+ const char *start = line;
+ char *name;
+
+ for (;;) {
+ char c = *line;
+
+ if (isspace(c)) {
+ if (c == '\n')
+ break;
+ if (name_terminate(start, line-start, c, terminate))
+ break;
+ }
+ line++;
+ if (c == '/' && !--p_value)
+ start = line;
+ }
+ if (!start)
+ return def;
+ len = line - start;
+ if (!len)
+ return def;
+
+ /*
+ * Generally we prefer the shorter name, especially
+ * if the other one is just a variation of that with
+ * something else tacked on to the end (ie "file.orig"
+ * or "file~").
+ */
+ if (def) {
+ int deflen = strlen(def);
+ if (deflen < len && !strncmp(start, def, deflen))
+ return def;
+ }
+
+ name = xmalloc(len + 1);
+ memcpy(name, start, len);
+ name[len] = 0;
+ free(def);
+ return name;
+}
+
+/*
+ * Get the name etc info from the --/+++ lines of a traditional patch header
+ *
+ * NOTE! This hardcodes "-p1" behaviour in filename detection.
+ *
+ * FIXME! The end-of-filename heuristics are kind of screwy. For existing
+ * files, we can happily check the index for a match, but for creating a
+ * new file we should try to match whatever "patch" does. I have no idea.
+ */
+static void parse_traditional_patch(const char *first, const char *second, struct patch *patch)
+{
+ int p_value = 1;
+ char *name;
+
+ first += 4; // skip "--- "
+ second += 4; // skip "+++ "
+ if (is_dev_null(first)) {
+ patch->is_new = 1;
+ patch->is_delete = 0;
+ name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB);
+ patch->new_name = name;
+ } else if (is_dev_null(second)) {
+ patch->is_new = 0;
+ patch->is_delete = 1;
+ name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+ patch->old_name = name;
+ } else {
+ name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+ name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
+ patch->old_name = patch->new_name = name;
+ }
+ if (!name)
+ die("unable to find filename in patch at line %d", linenr);
+}
+
+static int gitdiff_hdrend(const char *line, struct patch *patch)
+{
+ return -1;
+}
+
+/*
+ * We're anal about diff header consistency, to make
+ * sure that we don't end up having strange ambiguous
+ * patches floating around.
+ *
+ * As a result, gitdiff_{old|new}name() will check
+ * their names against any previous information, just
+ * to make sure..
+ */
+static char *gitdiff_verify_name(const char *line, int isnull, char *orig_name, const char *oldnew)
+{
+ int len;
+ const char *name;
+
+ if (!orig_name && !isnull)
+ return find_name(line, NULL, 1, 0);
+
+ name = "/dev/null";
+ len = 9;
+ if (orig_name) {
+ name = orig_name;
+ len = strlen(name);
+ if (isnull)
+ die("git-apply: bad git-diff - expected /dev/null, got %s on line %d", name, linenr);
+ }
+
+ if (*name == '/')
+ goto absolute_path;
+
+ for (;;) {
+ char c = *line++;
+ if (c == '\n')
+ break;
+ if (c != '/')
+ continue;
+absolute_path:
+ if (memcmp(line, name, len) || line[len] != '\n')
+ break;
+ return orig_name;
+ }
+ die("git-apply: bad git-diff - inconsistent %s filename on line %d", oldnew, linenr);
+ return NULL;
+}
+
+static int gitdiff_oldname(const char *line, struct patch *patch)
+{
+ patch->old_name = gitdiff_verify_name(line, patch->is_new, patch->old_name, "old");
+ return 0;
+}
+
+static int gitdiff_newname(const char *line, struct patch *patch)
+{
+ patch->new_name = gitdiff_verify_name(line, patch->is_delete, patch->new_name, "new");
+ return 0;
+}
+
+static int gitdiff_oldmode(const char *line, struct patch *patch)
+{
+ patch->old_mode = strtoul(line, NULL, 8);
+ return 0;
+}
+
+static int gitdiff_newmode(const char *line, struct patch *patch)
+{
+ patch->new_mode = strtoul(line, NULL, 8);
+ return 0;
+}
+
+static int gitdiff_delete(const char *line, struct patch *patch)
+{
+ patch->is_delete = 1;
+ patch->old_name = patch->def_name;
+ return gitdiff_oldmode(line, patch);
+}
+
+static int gitdiff_newfile(const char *line, struct patch *patch)
+{
+ patch->is_new = 1;
+ patch->new_name = patch->def_name;
+ return gitdiff_newmode(line, patch);
+}
+
+static int gitdiff_copysrc(const char *line, struct patch *patch)
+{
+ patch->is_copy = 1;
+ patch->old_name = find_name(line, NULL, 0, 0);
+ return 0;
+}
+
+static int gitdiff_copydst(const char *line, struct patch *patch)
+{
+ patch->is_copy = 1;
+ patch->new_name = find_name(line, NULL, 0, 0);
+ return 0;
+}
+
+static int gitdiff_renamesrc(const char *line, struct patch *patch)
+{
+ patch->is_rename = 1;
+ patch->old_name = find_name(line, NULL, 0, 0);
+ return 0;
+}
+
+static int gitdiff_renamedst(const char *line, struct patch *patch)
+{
+ patch->is_rename = 1;
+ patch->new_name = find_name(line, NULL, 0, 0);
+ return 0;
+}
+
+static int gitdiff_similarity(const char *line, struct patch *patch)
+{
+ if ((patch->score = strtoul(line, NULL, 10)) == ULONG_MAX)
+ patch->score = 0;
+ return 0;
+}
+
+static int gitdiff_dissimilarity(const char *line, struct patch *patch)
+{
+ if ((patch->score = strtoul(line, NULL, 10)) == ULONG_MAX)
+ patch->score = 0;
+ return 0;
+}
+
+/*
+ * This is normal for a diff that doesn't change anything: we'll fall through
+ * into the next diff. Tell the parser to break out.
+ */
+static int gitdiff_unrecognized(const char *line, struct patch *patch)
+{
+ return -1;
+}
+
+static char *git_header_name(char *line)
+{
+ int len;
+ char *name, *second;
+
+ /*
+ * Find the first '/'
+ */
+ name = line;
+ for (;;) {
+ char c = *name++;
+ if (c == '\n')
+ return NULL;
+ if (c == '/')
+ break;
+ }
+
+ /*
+ * We don't accept absolute paths (/dev/null) as possibly valid
+ */
+ if (name == line+1)
+ return NULL;
+
+ /*
+ * Accept a name only if it shows up twice, exactly the same
+ * form.
+ */
+ for (len = 0 ; ; len++) {
+ char c = name[len];
+
+ switch (c) {
+ default:
+ continue;
+ case '\n':
+ break;
+ case '\t': case ' ':
+ second = name+len;
+ for (;;) {
+ char c = *second++;
+ if (c == '\n')
+ return NULL;
+ if (c == '/')
+ break;
+ }
+ if (second[len] == '\n' && !memcmp(name, second, len)) {
+ char *ret = xmalloc(len + 1);
+ memcpy(ret, name, len);
+ ret[len] = 0;
+ return ret;
+ }
+ }
+ }
+ return NULL;
+}
+
+/* Verify that we recognize the lines following a git header */
+static int parse_git_header(char *line, int len, unsigned int size, struct patch *patch)
+{
+ unsigned long offset;
+
+ /* A git diff has explicit new/delete information, so we don't guess */
+ patch->is_new = 0;
+ patch->is_delete = 0;
+
+ /*
+ * Some things may not have the old name in the
+ * rest of the headers anywhere (pure mode changes,
+ * or removing or adding empty files), so we get
+ * the default name from the header.
+ */
+ patch->def_name = git_header_name(line + strlen("diff --git "));
+
+ line += len;
+ size -= len;
+ linenr++;
+ for (offset = len ; size > 0 ; offset += len, size -= len, line += len, linenr++) {
+ static const struct opentry {
+ const char *str;
+ int (*fn)(const char *, struct patch *);
+ } optable[] = {
+ { "@@ -", gitdiff_hdrend },
+ { "--- ", gitdiff_oldname },
+ { "+++ ", gitdiff_newname },
+ { "old mode ", gitdiff_oldmode },
+ { "new mode ", gitdiff_newmode },
+ { "deleted file mode ", gitdiff_delete },
+ { "new file mode ", gitdiff_newfile },
+ { "copy from ", gitdiff_copysrc },
+ { "copy to ", gitdiff_copydst },
+ { "rename old ", gitdiff_renamesrc },
+ { "rename new ", gitdiff_renamedst },
+ { "rename from ", gitdiff_renamesrc },
+ { "rename to ", gitdiff_renamedst },
+ { "similarity index ", gitdiff_similarity },
+ { "dissimilarity index ", gitdiff_dissimilarity },
+ { "", gitdiff_unrecognized },
+ };
+ int i;
+
+ len = linelen(line, size);
+ if (!len || line[len-1] != '\n')
+ break;
+ for (i = 0; i < sizeof(optable) / sizeof(optable[0]); i++) {
+ const struct opentry *p = optable + i;
+ int oplen = strlen(p->str);
+ if (len < oplen || memcmp(p->str, line, oplen))
+ continue;
+ if (p->fn(line + oplen, patch) < 0)
+ return offset;
+ break;
+ }
+ }
+
+ return offset;
+}
+
+static int parse_num(const char *line, unsigned long *p)
+{
+ char *ptr;
+
+ if (!isdigit(*line))
+ return 0;
+ *p = strtoul(line, &ptr, 10);
+ return ptr - line;
+}
+
+static int parse_range(const char *line, int len, int offset, const char *expect,
+ unsigned long *p1, unsigned long *p2)
+{
+ int digits, ex;
+
+ if (offset < 0 || offset >= len)
+ return -1;
+ line += offset;
+ len -= offset;
+
+ digits = parse_num(line, p1);
+ if (!digits)
+ return -1;
+
+ offset += digits;
+ line += digits;
+ len -= digits;
+
+ *p2 = *p1;
+ if (*line == ',') {
+ digits = parse_num(line+1, p2);
+ if (!digits)
+ return -1;
+
+ offset += digits+1;
+ line += digits+1;
+ len -= digits+1;
+ }
+
+ ex = strlen(expect);
+ if (ex > len)
+ return -1;
+ if (memcmp(line, expect, ex))
+ return -1;
+
+ return offset + ex;
+}
+
+/*
+ * Parse a unified diff fragment header of the
+ * form "@@ -a,b +c,d @@"
+ */
+static int parse_fragment_header(char *line, int len, struct fragment *fragment)
+{
+ int offset;
+
+ if (!len || line[len-1] != '\n')
+ return -1;
+
+ /* Figure out the number of lines in a fragment */
+ offset = parse_range(line, len, 4, " +", &fragment->oldpos, &fragment->oldlines);
+ offset = parse_range(line, len, offset, " @@", &fragment->newpos, &fragment->newlines);
+
+ return offset;
+}
+
+static int find_header(char *line, unsigned long size, int *hdrsize, struct patch *patch)
+{
+ unsigned long offset, len;
+
+ patch->is_rename = patch->is_copy = 0;
+ patch->is_new = patch->is_delete = -1;
+ patch->old_mode = patch->new_mode = 0;
+ patch->old_name = patch->new_name = NULL;
+ for (offset = 0; size > 0; offset += len, size -= len, line += len, linenr++) {
+ unsigned long nextlen;
+
+ len = linelen(line, size);
+ if (!len)
+ break;
+
+ /* Testing this early allows us to take a few shortcuts.. */
+ if (len < 6)
+ continue;
+
+ /*
+ * Make sure we don't find any unconnected patch fragmants.
+ * That's a sign that we didn't find a header, and that a
+ * patch has become corrupted/broken up.
+ */
+ if (!memcmp("@@ -", line, 4)) {
+ struct fragment dummy;
+ if (parse_fragment_header(line, len, &dummy) < 0)
+ continue;
+ error("patch fragment without header at line %d: %.*s", linenr, len-1, line);
+ }
+
+ if (size < len + 6)
+ break;
+
+ /*
+ * Git patch? It might not have a real patch, just a rename
+ * or mode change, so we handle that specially
+ */
+ if (!memcmp("diff --git ", line, 11)) {
+ int git_hdr_len = parse_git_header(line, len, size, patch);
+ if (git_hdr_len <= len)
+ continue;
+ if (!patch->old_name && !patch->new_name) {
+ if (!patch->def_name)
+ die("git diff header lacks filename information (line %d)", linenr);
+ patch->old_name = patch->new_name = patch->def_name;
+ }
+ *hdrsize = git_hdr_len;
+ return offset;
+ }
+
+ /** --- followed by +++ ? */
+ if (memcmp("--- ", line, 4) || memcmp("+++ ", line + len, 4))
+ continue;
+
+ /*
+ * We only accept unified patches, so we want it to
+ * at least have "@@ -a,b +c,d @@\n", which is 14 chars
+ * minimum
+ */
+ nextlen = linelen(line + len, size - len);
+ if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4))
+ continue;
+
+ /* Ok, we'll consider it a patch */
+ parse_traditional_patch(line, line+len, patch);
+ *hdrsize = len + nextlen;
+ linenr += 2;
+ return offset;
+ }
+ return -1;
+}
+
+/*
+ * Parse a unified diff. Note that this really needs
+ * to parse each fragment separately, since the only
+ * way to know the difference between a "---" that is
+ * part of a patch, and a "---" that starts the next
+ * patch is to look at the line counts..
+ */
+static int parse_fragment(char *line, unsigned long size, struct patch *patch, struct fragment *fragment)
+{
+ int added, deleted;
+ int len = linelen(line, size), offset;
+ unsigned long oldlines, newlines;
+
+ offset = parse_fragment_header(line, len, fragment);
+ if (offset < 0)
+ return -1;
+ oldlines = fragment->oldlines;
+ newlines = fragment->newlines;
+
+ if (patch->is_new < 0) {
+ patch->is_new = !oldlines;
+ if (!oldlines)
+ patch->old_name = NULL;
+ }
+ if (patch->is_delete < 0) {
+ patch->is_delete = !newlines;
+ if (!newlines)
+ patch->new_name = NULL;
+ }
+
+ if (patch->is_new != !oldlines)
+ return error("new file depends on old contents");
+ if (patch->is_delete != !newlines) {
+ if (newlines)
+ return error("deleted file still has contents");
+ fprintf(stderr, "** warning: file %s becomes empty but is not deleted\n", patch->new_name);
+ }
+
+ /* Parse the thing.. */
+ line += len;
+ size -= len;
+ linenr++;
+ added = deleted = 0;
+ for (offset = len; size > 0; offset += len, size -= len, line += len, linenr++) {
+ if (!oldlines && !newlines)
+ break;
+ len = linelen(line, size);
+ if (!len || line[len-1] != '\n')
+ return -1;
+ switch (*line) {
+ default:
+ return -1;
+ case ' ':
+ oldlines--;
+ newlines--;
+ break;
+ case '-':
+ deleted++;
+ oldlines--;
+ break;
+ case '+':
+ added++;
+ newlines--;
+ break;
+ /* We allow "\ No newline at end of file" */
+ case '\\':
+ if (len < 12 || memcmp(line, "\\ No newline", 12))
+ return -1;
+ break;
+ }
+ }
+ /* If a fragment ends with an incomplete line, we failed to include
+ * it in the above loop because we hit oldlines == newlines == 0
+ * before seeing it.
+ */
+ if (12 < size && !memcmp(line, "\\ No newline", 12))
+ offset += linelen(line, size);
+
+ patch->lines_added += added;
+ patch->lines_deleted += deleted;
+ return offset;
+}
+
+static int parse_single_patch(char *line, unsigned long size, struct patch *patch)
+{
+ unsigned long offset = 0;
+ struct fragment **fragp = &patch->fragments;
+
+ while (size > 4 && !memcmp(line, "@@ -", 4)) {
+ struct fragment *fragment;
+ int len;
+
+ fragment = xmalloc(sizeof(*fragment));
+ memset(fragment, 0, sizeof(*fragment));
+ len = parse_fragment(line, size, patch, fragment);
+ if (len <= 0)
+ die("corrupt patch at line %d", linenr);
+
+ fragment->patch = line;
+ fragment->size = len;
+
+ *fragp = fragment;
+ fragp = &fragment->next;
+
+ offset += len;
+ line += len;
+ size -= len;
+ }
+ return offset;
+}
+
+static int parse_chunk(char *buffer, unsigned long size, struct patch *patch)
+{
+ int hdrsize, patchsize;
+ int offset = find_header(buffer, size, &hdrsize, patch);
+
+ if (offset < 0)
+ return offset;
+
+ patchsize = parse_single_patch(buffer + offset + hdrsize, size - offset - hdrsize, patch);
+
+ return offset + hdrsize + patchsize;
+}
+
+static const char pluses[] = "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++";
+static const char minuses[]= "----------------------------------------------------------------------";
+
+static void show_stats(struct patch *patch)
+{
+ const char *prefix = "";
+ char *name = patch->new_name;
+ int len, max, add, del, total;
+
+ if (!name)
+ name = patch->old_name;
+
+ /*
+ * "scale" the filename
+ */
+ len = strlen(name);
+ max = max_len;
+ if (max > 50)
+ max = 50;
+ if (len > max) {
+ char *slash;
+ prefix = "...";
+ max -= 3;
+ name += len - max;
+ slash = strchr(name, '/');
+ if (slash)
+ name = slash;
+ }
+ len = max;
+
+ /*
+ * scale the add/delete
+ */
+ max = max_change;
+ if (max + len > 70)
+ max = 70 - len;
+
+ add = patch->lines_added;
+ del = patch->lines_deleted;
+ total = add + del;
+
+ if (max_change > 0) {
+ total = (total * max + max_change / 2) / max_change;
+ add = (add * max + max_change / 2) / max_change;
+ del = total - add;
+ }
+ printf(" %s%-*s |%5d %.*s%.*s\n", prefix,
+ len, name, patch->lines_added + patch->lines_deleted,
+ add, pluses, del, minuses);
+}
+
+static int read_old_data(struct stat *st, const char *path, void *buf, unsigned long size)
+{
+ int fd;
+ unsigned long got;
+
+ switch (st->st_mode & S_IFMT) {
+ case S_IFLNK:
+ return readlink(path, buf, size);
+ case S_IFREG:
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return error("unable to open %s", path);
+ got = 0;
+ for (;;) {
+ int ret = read(fd, buf + got, size - got);
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ break;
+ }
+ if (!ret)
+ break;
+ got += ret;
+ }
+ close(fd);
+ return got;
+
+ default:
+ return -1;
+ }
+}
+
+static int find_offset(const char *buf, unsigned long size, const char *fragment, unsigned long fragsize, int line)
+{
+ int i;
+ unsigned long start, backwards, forwards;
+
+ if (fragsize > size)
+ return -1;
+
+ start = 0;
+ if (line > 1) {
+ unsigned long offset = 0;
+ i = line-1;
+ while (offset + fragsize <= size) {
+ if (buf[offset++] == '\n') {
+ start = offset;
+ if (!--i)
+ break;
+ }
+ }
+ }
+
+ /* Exact line number? */
+ if (!memcmp(buf + start, fragment, fragsize))
+ return start;
+
+ /*
+ * There's probably some smart way to do this, but I'll leave
+ * that to the smart and beautiful people. I'm simple and stupid.
+ */
+ backwards = start;
+ forwards = start;
+ for (i = 0; ; i++) {
+ unsigned long try;
+ int n;
+
+ /* "backward" */
+ if (i & 1) {
+ if (!backwards) {
+ if (forwards + fragsize > size)
+ break;
+ continue;
+ }
+ do {
+ --backwards;
+ } while (backwards && buf[backwards-1] != '\n');
+ try = backwards;
+ } else {
+ while (forwards + fragsize <= size) {
+ if (buf[forwards++] == '\n')
+ break;
+ }
+ try = forwards;
+ }
+
+ if (try + fragsize > size)
+ continue;
+ if (memcmp(buf + try, fragment, fragsize))
+ continue;
+ n = (i >> 1)+1;
+ if (i & 1)
+ n = -n;
+ return try;
+ }
+
+ /*
+ * We should start searching forward and backward.
+ */
+ return -1;
+}
+
+struct buffer_desc {
+ char *buffer;
+ unsigned long size;
+ unsigned long alloc;
+};
+
+static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag)
+{
+ char *buf = desc->buffer;
+ const char *patch = frag->patch;
+ int offset, size = frag->size;
+ char *old = xmalloc(size);
+ char *new = xmalloc(size);
+ int oldsize = 0, newsize = 0;
+
+ while (size > 0) {
+ int len = linelen(patch, size);
+ int plen;
+
+ if (!len)
+ break;
+
+ /*
+ * "plen" is how much of the line we should use for
+ * the actual patch data. Normally we just remove the
+ * first character on the line, but if the line is
+ * followed by "\ No newline", then we also remove the
+ * last one (which is the newline, of course).
+ */
+ plen = len-1;
+ if (len < size && patch[len] == '\\')
+ plen--;
+ switch (*patch) {
+ case ' ':
+ case '-':
+ memcpy(old + oldsize, patch + 1, plen);
+ oldsize += plen;
+ if (*patch == '-')
+ break;
+ /* Fall-through for ' ' */
+ case '+':
+ memcpy(new + newsize, patch + 1, plen);
+ newsize += plen;
+ break;
+ case '@': case '\\':
+ /* Ignore it, we already handled it */
+ break;
+ default:
+ return -1;
+ }
+ patch += len;
+ size -= len;
+ }
+
+ offset = find_offset(buf, desc->size, old, oldsize, frag->newpos);
+ if (offset >= 0) {
+ int diff = newsize - oldsize;
+ unsigned long size = desc->size + diff;
+ unsigned long alloc = desc->alloc;
+
+ if (size > alloc) {
+ alloc = size + 8192;
+ desc->alloc = alloc;
+ buf = xrealloc(buf, alloc);
+ desc->buffer = buf;
+ }
+ desc->size = size;
+ memmove(buf + offset + newsize, buf + offset + oldsize, size - offset - newsize);
+ memcpy(buf + offset, new, newsize);
+ offset = 0;
+ }
+
+ free(old);
+ free(new);
+ return offset;
+}
+
+static int apply_fragments(struct buffer_desc *desc, struct patch *patch)
+{
+ struct fragment *frag = patch->fragments;
+
+ while (frag) {
+ if (apply_one_fragment(desc, frag) < 0)
+ return error("patch failed: %s:%d", patch->old_name, frag->oldpos);
+ frag = frag->next;
+ }
+ return 0;
+}
+
+static int apply_data(struct patch *patch, struct stat *st)
+{
+ char *buf;
+ unsigned long size, alloc;
+ struct buffer_desc desc;
+
+ size = 0;
+ alloc = 0;
+ buf = NULL;
+ if (patch->old_name) {
+ size = st->st_size;
+ alloc = size + 8192;
+ buf = xmalloc(alloc);
+ if (read_old_data(st, patch->old_name, buf, alloc) != size)
+ return error("read of %s failed", patch->old_name);
+ }
+
+ desc.size = size;
+ desc.alloc = alloc;
+ desc.buffer = buf;
+ if (apply_fragments(&desc, patch) < 0)
+ return -1;
+ patch->result = desc.buffer;
+ patch->resultsize = desc.size;
+
+ if (patch->is_delete && patch->resultsize)
+ return error("removal patch leaves file contents");
+
+ return 0;
+}
+
+static int check_patch(struct patch *patch)
+{
+ struct stat st;
+ const char *old_name = patch->old_name;
+ const char *new_name = patch->new_name;
+
+ if (old_name) {
+ int changed;
+
+ if (lstat(old_name, &st) < 0)
+ return error("%s: %s", old_name, strerror(errno));
+ if (check_index) {
+ int pos = cache_name_pos(old_name, strlen(old_name));
+ if (pos < 0)
+ return error("%s: does not exist in index", old_name);
+ changed = ce_match_stat(active_cache[pos], &st);
+ if (changed)
+ return error("%s: does not match index", old_name);
+ }
+ if (patch->is_new < 0)
+ patch->is_new = 0;
+ st.st_mode = ntohl(create_ce_mode(st.st_mode));
+ if (!patch->old_mode)
+ patch->old_mode = st.st_mode;
+ if ((st.st_mode ^ patch->old_mode) & S_IFMT)
+ return error("%s: wrong type", old_name);
+ if (st.st_mode != patch->old_mode)
+ fprintf(stderr, "warning: %s has type %o, expected %o\n",
+ old_name, st.st_mode, patch->old_mode);
+ }
+
+ if (new_name && (patch->is_new | patch->is_rename | patch->is_copy)) {
+ if (check_index && cache_name_pos(new_name, strlen(new_name)) >= 0)
+ return error("%s: already exists in index", new_name);
+ if (!lstat(new_name, &st))
+ return error("%s: already exists in working directory", new_name);
+ if (errno != ENOENT)
+ return error("%s: %s", new_name, strerror(errno));
+ if (!patch->new_mode)
+ patch->new_mode = S_IFREG | 0644;
+ }
+
+ if (new_name && old_name) {
+ int same = !strcmp(old_name, new_name);
+ if (!patch->new_mode)
+ patch->new_mode = patch->old_mode;
+ if ((patch->old_mode ^ patch->new_mode) & S_IFMT)
+ return error("new mode (%o) of %s does not match old mode (%o)%s%s",
+ patch->new_mode, new_name, patch->old_mode,
+ same ? "" : " of ", same ? "" : old_name);
+ }
+
+ if (apply_data(patch, &st) < 0)
+ return error("%s: patch does not apply", old_name);
+ return 0;
+}
+
+static int check_patch_list(struct patch *patch)
+{
+ int error = 0;
+
+ for (;patch ; patch = patch->next)
+ error |= check_patch(patch);
+ return error;
+}
+
+static void show_file(int c, unsigned int mode, const char *name)
+{
+ printf("%c %o %s\n", c, mode, name);
+}
+
+static void show_file_list(struct patch *patch)
+{
+ for (;patch ; patch = patch->next) {
+ if (patch->is_rename) {
+ show_file('-', patch->old_mode, patch->old_name);
+ show_file('+', patch->new_mode, patch->new_name);
+ continue;
+ }
+ if (patch->is_copy || patch->is_new) {
+ show_file('+', patch->new_mode, patch->new_name);
+ continue;
+ }
+ if (patch->is_delete) {
+ show_file('-', patch->old_mode, patch->old_name);
+ continue;
+ }
+ if (patch->old_mode && patch->new_mode && patch->old_mode != patch->new_mode) {
+ printf("M %o:%o %s\n", patch->old_mode, patch->new_mode, patch->old_name);
+ continue;
+ }
+ printf("M %o %s\n", patch->old_mode, patch->old_name);
+ }
+}
+
+static void stat_patch_list(struct patch *patch)
+{
+ int files, adds, dels;
+
+ for (files = adds = dels = 0 ; patch ; patch = patch->next) {
+ files++;
+ adds += patch->lines_added;
+ dels += patch->lines_deleted;
+ show_stats(patch);
+ }
+
+ printf(" %d files changed, %d insertions(+), %d deletions(-)\n", files, adds, dels);
+}
+
+static void show_file_mode_name(const char *newdelete, unsigned int mode, const char *name)
+{
+ if (mode)
+ printf(" %s mode %06o %s\n", newdelete, mode, name);
+ else
+ printf(" %s %s\n", newdelete, name);
+}
+
+static void show_mode_change(struct patch *p, int show_name)
+{
+ if (p->old_mode && p->new_mode && p->old_mode != p->new_mode) {
+ if (show_name)
+ printf(" mode change %06o => %06o %s\n",
+ p->old_mode, p->new_mode, p->new_name);
+ else
+ printf(" mode change %06o => %06o\n",
+ p->old_mode, p->new_mode);
+ }
+}
+
+static void show_rename_copy(struct patch *p)
+{
+ const char *renamecopy = p->is_rename ? "rename" : "copy";
+ const char *old, *new;
+
+ /* Find common prefix */
+ old = p->old_name;
+ new = p->new_name;
+ while (1) {
+ const char *slash_old, *slash_new;
+ slash_old = strchr(old, '/');
+ slash_new = strchr(new, '/');
+ if (!slash_old ||
+ !slash_new ||
+ slash_old - old != slash_new - new ||
+ memcmp(old, new, slash_new - new))
+ break;
+ old = slash_old + 1;
+ new = slash_new + 1;
+ }
+ /* p->old_name thru old is the common prefix, and old and new
+ * through the end of names are renames
+ */
+ if (old != p->old_name)
+ printf(" %s %.*s{%s => %s} (%d%%)\n", renamecopy,
+ (int)(old - p->old_name), p->old_name,
+ old, new, p->score);
+ else
+ printf(" %s %s => %s (%d%%)\n", renamecopy,
+ p->old_name, p->new_name, p->score);
+ show_mode_change(p, 0);
+}
+
+static void summary_patch_list(struct patch *patch)
+{
+ struct patch *p;
+
+ for (p = patch; p; p = p->next) {
+ if (p->is_new)
+ show_file_mode_name("create", p->new_mode, p->new_name);
+ else if (p->is_delete)
+ show_file_mode_name("delete", p->old_mode, p->old_name);
+ else {
+ if (p->is_rename || p->is_copy)
+ show_rename_copy(p);
+ else {
+ if (p->score) {
+ printf(" rewrite %s (%d%%)\n",
+ p->new_name, p->score);
+ show_mode_change(p, 0);
+ }
+ else
+ show_mode_change(p, 1);
+ }
+ }
+ }
+}
+
+static void patch_stats(struct patch *patch)
+{
+ int lines = patch->lines_added + patch->lines_deleted;
+
+ if (lines > max_change)
+ max_change = lines;
+ if (patch->old_name) {
+ int len = strlen(patch->old_name);
+ if (len > max_len)
+ max_len = len;
+ }
+ if (patch->new_name) {
+ int len = strlen(patch->new_name);
+ if (len > max_len)
+ max_len = len;
+ }
+}
+
+static void remove_file(struct patch *patch)
+{
+ if (write_index) {
+ if (remove_file_from_cache(patch->old_name) < 0)
+ die("unable to remove %s from index", patch->old_name);
+ }
+ unlink(patch->old_name);
+}
+
+static void add_index_file(const char *path, unsigned mode, void *buf, unsigned long size)
+{
+ struct stat st;
+ struct cache_entry *ce;
+ int namelen = strlen(path);
+ unsigned ce_size = cache_entry_size(namelen);
+
+ if (!write_index)
+ return;
+
+ ce = xmalloc(ce_size);
+ memset(ce, 0, ce_size);
+ memcpy(ce->name, path, namelen);
+ ce->ce_mode = create_ce_mode(mode);
+ ce->ce_flags = htons(namelen);
+ if (lstat(path, &st) < 0)
+ die("unable to stat newly created file %s", path);
+ fill_stat_cache_info(ce, &st);
+ if (write_sha1_file(buf, size, "blob", ce->sha1) < 0)
+ die("unable to create backing store for newly created file %s", path);
+ if (add_cache_entry(ce, ADD_CACHE_OK_TO_ADD) < 0)
+ die("unable to add cache entry for %s", path);
+}
+
+static void create_subdirectories(const char *path)
+{
+ int len = strlen(path);
+ char *buf = xmalloc(len + 1);
+ const char *slash = path;
+
+ while ((slash = strchr(slash+1, '/')) != NULL) {
+ len = slash - path;
+ memcpy(buf, path, len);
+ buf[len] = 0;
+ if (mkdir(buf, 0777) < 0) {
+ if (errno != EEXIST)
+ break;
+ }
+ }
+ free(buf);
+}
+
+static int try_create_file(const char *path, unsigned int mode, const char *buf, unsigned long size)
+{
+ int fd;
+
+ if (S_ISLNK(mode))
+ return symlink(buf, path);
+ fd = open(path, O_CREAT | O_EXCL | O_WRONLY | O_TRUNC, (mode & 0100) ? 0777 : 0666);
+ if (fd < 0)
+ return -1;
+ while (size) {
+ int written = write(fd, buf, size);
+ if (written < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ die("writing file %s: %s", path, strerror(errno));
+ }
+ if (!written)
+ die("out of space writing file %s", path);
+ buf += written;
+ size -= written;
+ }
+ if (close(fd) < 0)
+ die("closing file %s: %s", path, strerror(errno));
+ return 0;
+}
+
+/*
+ * We optimistically assume that the directories exist,
+ * which is true 99% of the time anyway. If they don't,
+ * we create them and try again.
+ */
+static void create_one_file(const char *path, unsigned mode, const char *buf, unsigned long size)
+{
+ if (!try_create_file(path, mode, buf, size))
+ return;
+
+ if (errno == ENOENT) {
+ create_subdirectories(path);
+ if (!try_create_file(path, mode, buf, size))
+ return;
+ }
+
+ if (errno == EEXIST) {
+ unsigned int nr = getpid();
+
+ for (;;) {
+ const char *newpath;
+ newpath = mkpath("%s~%u", path, nr);
+ if (!try_create_file(newpath, mode, buf, size)) {
+ if (!rename(newpath, path))
+ return;
+ unlink(newpath);
+ break;
+ }
+ if (errno != EEXIST)
+ break;
+ }
+ }
+ die("unable to write file %s mode %o", path, mode);
+}
+
+static void create_file(struct patch *patch)
+{
+ const char *path = patch->new_name;
+ unsigned mode = patch->new_mode;
+ unsigned long size = patch->resultsize;
+ char *buf = patch->result;
+
+ if (!mode)
+ mode = S_IFREG | 0644;
+ create_one_file(path, mode, buf, size);
+ add_index_file(path, mode, buf, size);
+}
+
+static void write_out_one_result(struct patch *patch)
+{
+ if (patch->is_delete > 0) {
+ remove_file(patch);
+ return;
+ }
+ if (patch->is_new > 0 || patch->is_copy) {
+ create_file(patch);
+ return;
+ }
+ /*
+ * Rename or modification boils down to the same
+ * thing: remove the old, write the new
+ */
+ remove_file(patch);
+ create_file(patch);
+}
+
+static void write_out_results(struct patch *list, int skipped_patch)
+{
+ if (!list && !skipped_patch)
+ die("No changes");
+
+ while (list) {
+ write_out_one_result(list);
+ list = list->next;
+ }
+}
+
+static struct cache_file cache_file;
+
+static struct excludes {
+ struct excludes *next;
+ const char *path;
+} *excludes;
+
+static int use_patch(struct patch *p)
+{
+ const char *pathname = p->new_name ? : p->old_name;
+ struct excludes *x = excludes;
+ while (x) {
+ if (fnmatch(x->path, pathname, 0) == 0)
+ return 0;
+ x = x->next;
+ }
+ return 1;
+}
+
+static int apply_patch(int fd)
+{
+ int newfd;
+ unsigned long offset, size;
+ char *buffer = read_patch_file(fd, &size);
+ struct patch *list = NULL, **listp = &list;
+ int skipped_patch = 0;
+
+ if (!buffer)
+ return -1;
+ offset = 0;
+ while (size > 0) {
+ struct patch *patch;
+ int nr;
+
+ patch = xmalloc(sizeof(*patch));
+ memset(patch, 0, sizeof(*patch));
+ nr = parse_chunk(buffer + offset, size, patch);
+ if (nr < 0)
+ break;
+ if (use_patch(patch)) {
+ patch_stats(patch);
+ *listp = patch;
+ listp = &patch->next;
+ } else {
+ /* perhaps free it a bit better? */
+ free(patch);
+ skipped_patch++;
+ }
+ offset += nr;
+ size -= nr;
+ }
+
+ newfd = -1;
+ write_index = check_index && apply;
+ if (write_index)
+ newfd = hold_index_file_for_update(&cache_file, get_index_file());
+ if (check_index) {
+ if (read_cache() < 0)
+ die("unable to read index file");
+ }
+
+ if ((check || apply) && check_patch_list(list) < 0)
+ exit(1);
+
+ if (apply)
+ write_out_results(list, skipped_patch);
+
+ if (write_index) {
+ if (write_cache(newfd, active_cache, active_nr) ||
+ commit_index_file(&cache_file))
+ die("Unable to write new cachefile");
+ }
+
+ if (show_files)
+ show_file_list(list);
+
+ if (diffstat)
+ stat_patch_list(list);
+
+ if (summary)
+ summary_patch_list(list);
+
+ free(buffer);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+ int read_stdin = 1;
+
+ for (i = 1; i < argc; i++) {
+ const char *arg = argv[i];
+ int fd;
+
+ if (!strcmp(arg, "-")) {
+ apply_patch(0);
+ read_stdin = 0;
+ continue;
+ }
+ if (!strncmp(arg, "--exclude=", 10)) {
+ struct excludes *x = xmalloc(sizeof(*x));
+ x->path = arg + 10;
+ x->next = excludes;
+ excludes = x;
+ continue;
+ }
+ /* NEEDSWORK: this does not do anything at this moment. */
+ if (!strcmp(arg, "--no-merge")) {
+ merge_patch = 0;
+ continue;
+ }
+ if (!strcmp(arg, "--stat")) {
+ apply = 0;
+ diffstat = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--summary")) {
+ apply = 0;
+ summary = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--check")) {
+ apply = 0;
+ check = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--index")) {
+ check_index = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--apply")) {
+ apply = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--show-files")) {
+ show_files = 1;
+ continue;
+ }
+ fd = open(arg, O_RDONLY);
+ if (fd < 0)
+ usage(apply_usage);
+ read_stdin = 0;
+ apply_patch(fd);
+ close(fd);
+ }
+ if (read_stdin)
+ apply_patch(0);
+ return 0;
+}
diff --git a/blob.c b/blob.c
new file mode 100644
index 0000000..ea52ad5
--- /dev/null
+++ b/blob.c
@@ -0,0 +1,52 @@
+#include "blob.h"
+#include "cache.h"
+#include <stdlib.h>
+
+const char *blob_type = "blob";
+
+struct blob *lookup_blob(const unsigned char *sha1)
+{
+ struct object *obj = lookup_object(sha1);
+ if (!obj) {
+ struct blob *ret = xmalloc(sizeof(struct blob));
+ memset(ret, 0, sizeof(struct blob));
+ created_object(sha1, &ret->object);
+ ret->object.type = blob_type;
+ return ret;
+ }
+ if (!obj->type)
+ obj->type = blob_type;
+ if (obj->type != blob_type) {
+ error("Object %s is a %s, not a blob",
+ sha1_to_hex(sha1), obj->type);
+ return NULL;
+ }
+ return (struct blob *) obj;
+}
+
+int parse_blob_buffer(struct blob *item, void *buffer, unsigned long size)
+{
+ item->object.parsed = 1;
+ return 0;
+}
+
+int parse_blob(struct blob *item)
+{
+ char type[20];
+ void *buffer;
+ unsigned long size;
+ int ret;
+
+ if (item->object.parsed)
+ return 0;
+ buffer = read_sha1_file(item->object.sha1, type, &size);
+ if (!buffer)
+ return error("Could not read %s",
+ sha1_to_hex(item->object.sha1));
+ if (strcmp(type, blob_type))
+ return error("Object %s not a blob",
+ sha1_to_hex(item->object.sha1));
+ ret = parse_blob_buffer(item, buffer, size);
+ free(buffer);
+ return ret;
+}
diff --git a/blob.h b/blob.h
new file mode 100644
index 0000000..ea5d9e9
--- /dev/null
+++ b/blob.h
@@ -0,0 +1,18 @@
+#ifndef BLOB_H
+#define BLOB_H
+
+#include "object.h"
+
+extern const char *blob_type;
+
+struct blob {
+ struct object object;
+};
+
+struct blob *lookup_blob(const unsigned char *sha1);
+
+int parse_blob_buffer(struct blob *item, void *buffer, unsigned long size);
+
+int parse_blob(struct blob *item);
+
+#endif /* BLOB_H */
diff --git a/build-rev-cache.c b/build-rev-cache.c
new file mode 100644
index 0000000..948898b
--- /dev/null
+++ b/build-rev-cache.c
@@ -0,0 +1,56 @@
+#include "refs.h"
+#include "cache.h"
+#include "commit.h"
+#include "rev-cache.h"
+
+static void process_head_list(int verbose)
+{
+ char buf[512];
+
+ while (fgets(buf, sizeof(buf), stdin)) {
+ unsigned char sha1[20];
+ struct commit *commit;
+
+ if (get_sha1_hex(buf, sha1)) {
+ error("ignoring: %s", buf);
+ continue;
+ }
+ if (!(commit = lookup_commit_reference(sha1))) {
+ error("not a commit: %s", sha1_to_hex(sha1));
+ continue;
+ }
+ record_rev_cache(commit->object.sha1, verbose ? stderr : NULL);
+ }
+}
+
+
+static const char *build_rev_cache_usage =
+"git-build-rev-cache <rev-cache-file> < list-of-heads";
+
+int main(int ac, char **av)
+{
+ int verbose = 0;
+ const char *path;
+
+ while (1 < ac && av[1][0] == '-') {
+ if (!strcmp(av[1], "-v"))
+ verbose = 1;
+ else
+ usage(build_rev_cache_usage);
+ ac--; av++;
+ }
+
+ if (ac != 2)
+ usage(build_rev_cache_usage);
+
+ path = av[1];
+
+ /* read existing rev-cache */
+ read_rev_cache(path, NULL, 0);
+
+ process_head_list(verbose);
+
+ /* update the rev-cache database by appending newly found one to it */
+ write_rev_cache(path, path);
+ return 0;
+}
diff --git a/cache.h b/cache.h
new file mode 100644
index 0000000..d1bdb56
--- /dev/null
+++ b/cache.h
@@ -0,0 +1,314 @@
+#ifndef CACHE_H
+#define CACHE_H
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include SHA1_HEADER
+#include <zlib.h>
+
+#if ZLIB_VERNUM < 0x1200
+#define deflateBound(c,s) ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
+#endif
+
+#ifdef DT_UNKNOWN
+#define DTYPE(de) ((de)->d_type)
+#else
+#define DT_UNKNOWN 0
+#define DT_DIR 1
+#define DT_REG 2
+#define DT_LNK 3
+#define DTYPE(de) DT_UNKNOWN
+#endif
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#endif
+
+/*
+ * Intensive research over the course of many years has shown that
+ * port 9418 is totally unused by anything else. Or
+ *
+ * Your search - "port 9418" - did not match any documents.
+ *
+ * as www.google.com puts it.
+ */
+#define DEFAULT_GIT_PORT 9418
+
+/*
+ * Environment variables transition.
+ * We accept older names for now but warn.
+ */
+extern char *gitenv_bc(const char *);
+#define gitenv(e) (getenv(e) ? : gitenv_bc(e))
+
+/*
+ * Basic data structures for the directory cache
+ */
+
+#define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
+struct cache_header {
+ unsigned int hdr_signature;
+ unsigned int hdr_version;
+ unsigned int hdr_entries;
+};
+
+/*
+ * The "cache_time" is just the low 32 bits of the
+ * time. It doesn't matter if it overflows - we only
+ * check it for equality in the 32 bits we save.
+ */
+struct cache_time {
+ unsigned int sec;
+ unsigned int nsec;
+};
+
+/*
+ * dev/ino/uid/gid/size are also just tracked to the low 32 bits
+ * Again - this is just a (very strong in practice) heuristic that
+ * the inode hasn't changed.
+ *
+ * We save the fields in big-endian order to allow using the
+ * index file over NFS transparently.
+ */
+struct cache_entry {
+ struct cache_time ce_ctime;
+ struct cache_time ce_mtime;
+ unsigned int ce_dev;
+ unsigned int ce_ino;
+ unsigned int ce_mode;
+ unsigned int ce_uid;
+ unsigned int ce_gid;
+ unsigned int ce_size;
+ unsigned char sha1[20];
+ unsigned short ce_flags;
+ char name[0];
+};
+
+#define CE_NAMEMASK (0x0fff)
+#define CE_STAGEMASK (0x3000)
+#define CE_UPDATE (0x4000)
+#define CE_STAGESHIFT 12
+
+#define create_ce_flags(len, stage) htons((len) | ((stage) << CE_STAGESHIFT))
+#define ce_namelen(ce) (CE_NAMEMASK & ntohs((ce)->ce_flags))
+#define ce_size(ce) cache_entry_size(ce_namelen(ce))
+#define ce_stage(ce) ((CE_STAGEMASK & ntohs((ce)->ce_flags)) >> CE_STAGESHIFT)
+
+#define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
+static inline unsigned int create_ce_mode(unsigned int mode)
+{
+ if (S_ISLNK(mode))
+ return htonl(S_IFLNK);
+ return htonl(S_IFREG | ce_permissions(mode));
+}
+
+#define cache_entry_size(len) ((offsetof(struct cache_entry,name) + (len) + 8) & ~7)
+
+extern struct cache_entry **active_cache;
+extern unsigned int active_nr, active_alloc, active_cache_changed;
+
+#define GIT_DIR_ENVIRONMENT "GIT_DIR"
+#define DEFAULT_GIT_DIR_ENVIRONMENT ".git"
+#define DB_ENVIRONMENT "GIT_OBJECT_DIRECTORY"
+#define INDEX_ENVIRONMENT "GIT_INDEX_FILE"
+
+extern char *get_object_directory(void);
+extern char *get_refs_directory(void);
+extern char *get_index_file(void);
+
+#define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES"
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/* Initialize and use the cache information */
+extern int read_cache(void);
+extern int write_cache(int newfd, struct cache_entry **cache, int entries);
+extern int cache_name_pos(const char *name, int namelen);
+#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
+#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
+#define ADD_CACHE_SKIP_DFCHECK 4 /* Ok to skip DF conflict checks */
+extern int add_cache_entry(struct cache_entry *ce, int option);
+extern int remove_cache_entry_at(int pos);
+extern int remove_file_from_cache(char *path);
+extern int ce_same_name(struct cache_entry *a, struct cache_entry *b);
+extern int ce_match_stat(struct cache_entry *ce, struct stat *st);
+extern int ce_path_match(const struct cache_entry *ce, const char **pathspec);
+extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, const char *type);
+extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
+
+struct cache_file {
+ struct cache_file *next;
+ char lockfile[PATH_MAX];
+};
+extern int hold_index_file_for_update(struct cache_file *, const char *path);
+extern int commit_index_file(struct cache_file *);
+extern void rollback_index_file(struct cache_file *);
+
+#define MTIME_CHANGED 0x0001
+#define CTIME_CHANGED 0x0002
+#define OWNER_CHANGED 0x0004
+#define MODE_CHANGED 0x0008
+#define INODE_CHANGED 0x0010
+#define DATA_CHANGED 0x0020
+#define TYPE_CHANGED 0x0040
+
+/* Return a statically allocated filename matching the sha1 signature */
+extern char *mkpath(const char *fmt, ...);
+extern char *git_path(const char *fmt, ...);
+extern char *sha1_file_name(const unsigned char *sha1);
+
+int safe_create_leading_directories(char *path);
+
+/* Read and unpack a sha1 file into memory, write memory to a sha1 file */
+extern int unpack_sha1_header(z_stream *stream, void *map, unsigned long mapsize, void *buffer, unsigned long size);
+extern int parse_sha1_header(char *hdr, char *type, unsigned long *sizep);
+extern int sha1_object_info(const unsigned char *, char *, unsigned long *);
+extern void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size);
+extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size);
+extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
+extern char *write_sha1_file_prepare(void *buf,
+ unsigned long len,
+ const char *type,
+ unsigned char *sha1,
+ unsigned char *hdr,
+ int *hdrlen);
+
+extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
+
+/* Read a tree into the cache */
+extern int read_tree(void *buffer, unsigned long size, int stage, const char **paths);
+
+extern int write_sha1_from_fd(const unsigned char *sha1, int fd);
+extern int write_sha1_to_fd(int fd, const unsigned char *sha1);
+
+extern int has_sha1_pack(const unsigned char *sha1);
+extern int has_sha1_file(const unsigned char *sha1);
+
+/* Convert to/from hex/sha1 representation */
+extern int get_sha1(const char *str, unsigned char *sha1);
+extern int get_sha1_hex(const char *hex, unsigned char *sha1);
+extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */
+
+/* General helper functions */
+extern void usage(const char *err) NORETURN;
+extern void die(const char *err, ...) NORETURN;
+extern int error(const char *err, ...);
+
+extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
+extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2);
+
+extern void *read_object_with_reference(const unsigned char *sha1,
+ const char *required_type,
+ unsigned long *size,
+ unsigned char *sha1_ret);
+
+const char *show_date(unsigned long time, int timezone);
+void parse_date(const char *date, char *buf, int bufsize);
+void datestamp(char *buf, int bufsize);
+
+extern int setup_ident(void);
+extern char *get_ident(const char *name, const char *email, const char *date_str);
+extern char *git_author_info(void);
+extern char *git_committer_info(void);
+
+static inline void *xmalloc(size_t size)
+{
+ void *ret = malloc(size);
+ if (!ret)
+ die("Out of memory, malloc failed");
+ return ret;
+}
+
+static inline void *xrealloc(void *ptr, size_t size)
+{
+ void *ret = realloc(ptr, size);
+ if (!ret)
+ die("Out of memory, realloc failed");
+ return ret;
+}
+
+static inline void *xcalloc(size_t nmemb, size_t size)
+{
+ void *ret = calloc(nmemb, size);
+ if (!ret)
+ die("Out of memory, calloc failed");
+ return ret;
+}
+
+struct checkout {
+ const char *base_dir;
+ int base_dir_len;
+ unsigned force:1,
+ quiet:1,
+ not_new:1,
+ refresh_cache:1;
+};
+
+extern int checkout_entry(struct cache_entry *ce, struct checkout *state);
+
+extern struct alternate_object_database {
+ char *base;
+ char *name;
+} *alt_odb;
+extern void prepare_alt_odb(void);
+
+extern struct packed_git {
+ struct packed_git *next;
+ unsigned long index_size;
+ unsigned long pack_size;
+ unsigned int *index_base;
+ void *pack_base;
+ unsigned int pack_last_used;
+ unsigned int pack_use_cnt;
+ char pack_name[0]; /* something like ".git/objects/pack/xxxxx.pack" */
+} *packed_git;
+
+struct pack_entry {
+ unsigned int offset;
+ unsigned char sha1[20];
+ struct packed_git *p;
+};
+
+struct ref {
+ struct ref *next;
+ unsigned char old_sha1[20];
+ unsigned char new_sha1[20];
+ char name[0];
+};
+
+extern int git_connect(int fd[2], char *url, const char *prog);
+extern int finish_connect(pid_t pid);
+extern int path_match(const char *path, int nr, char **match);
+extern int get_ack(int fd, unsigned char *result_sha1);
+extern struct ref **get_remote_heads(int in, struct ref **list, int nr_match, char **match);
+
+extern void prepare_packed_git(void);
+extern int use_packed_git(struct packed_git *);
+extern void unuse_packed_git(struct packed_git *);
+extern struct packed_git *add_packed_git(char *, int);
+extern int num_packed_objects(const struct packed_git *p);
+extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*);
+extern int find_pack_entry_one(const unsigned char *, struct pack_entry *, struct packed_git *);
+extern void *unpack_entry_gently(struct pack_entry *, char *, unsigned long *);
+extern void packed_object_info_detail(struct pack_entry *, char *, unsigned long *, unsigned long *, int *, unsigned char *);
+
+/* Dumb servers support */
+extern int update_server_info(int);
+
+#endif /* CACHE_H */
diff --git a/cat-file.c b/cat-file.c
new file mode 100644
index 0000000..ef58970
--- /dev/null
+++ b/cat-file.c
@@ -0,0 +1,55 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "cache.h"
+
+int main(int argc, char **argv)
+{
+ unsigned char sha1[20];
+ char type[20];
+ void *buf;
+ unsigned long size;
+
+ if (argc != 3 || get_sha1(argv[2], sha1))
+ usage("git-cat-file [-t | -s | <type>] <sha1>");
+
+ if (!strcmp("-t", argv[1]) || !strcmp("-s", argv[1])) {
+ if (!sha1_object_info(sha1, type,
+ argv[1][1] == 's' ? &size : NULL)) {
+ switch (argv[1][1]) {
+ case 't':
+ printf("%s\n", type);
+ break;
+ case 's':
+ printf("%lu\n", size);
+ break;
+ }
+ return 0;
+ }
+ buf = NULL;
+ } else {
+ buf = read_object_with_reference(sha1, argv[1], &size, NULL);
+ }
+
+ if (!buf)
+ die("git-cat-file %s: bad file", argv[2]);
+
+ while (size > 0) {
+ long ret = write(1, buf, size);
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ /* Ignore epipe */
+ if (errno == EPIPE)
+ break;
+ die("git-cat-file: %s", strerror(errno));
+ } else if (!ret) {
+ die("git-cat-file: disk full?");
+ }
+ size -= ret;
+ buf += ret;
+ }
+ return 0;
+}
diff --git a/check-files.c b/check-files.c
new file mode 100644
index 0000000..6fd69e7
--- /dev/null
+++ b/check-files.c
@@ -0,0 +1,47 @@
+/*
+ * check-files.c
+ *
+ * Check that a set of files are up-to-date in the filesystem or
+ * do not exist. Used to verify a patch target before doing a patch.
+ *
+ * Copyright (C) 2005 Linus Torvalds
+ */
+#include "cache.h"
+
+static void check_file(const char *path)
+{
+ int fd = open(path, O_RDONLY);
+ struct cache_entry *ce;
+ struct stat st;
+ int pos, changed;
+
+ /* Nonexistent is fine */
+ if (fd < 0) {
+ if (errno != ENOENT)
+ die("%s: %s", path, strerror(errno));
+ return;
+ }
+
+ /* Exists but is not in the cache is not fine */
+ pos = cache_name_pos(path, strlen(path));
+ if (pos < 0)
+ die("preparing to update existing file '%s' not in cache", path);
+ ce = active_cache[pos];
+
+ if (lstat(path, &st) < 0)
+ die("lstat(%s): %s", path, strerror(errno));
+
+ changed = ce_match_stat(ce, &st);
+ if (changed)
+ die("preparing to update file '%s' not uptodate in cache", path);
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+
+ read_cache();
+ for (i = 1; i < argc ; i++)
+ check_file(argv[i]);
+ return 0;
+}
diff --git a/checkout-cache.c b/checkout-cache.c
new file mode 100644
index 0000000..d6c4ff1
--- /dev/null
+++ b/checkout-cache.c
@@ -0,0 +1,150 @@
+/*
+ * Check-out files from the "current cache directory"
+ *
+ * Copyright (C) 2005 Linus Torvalds
+ *
+ * Careful: order of argument flags does matter. For example,
+ *
+ * git-checkout-cache -a -f file.c
+ *
+ * Will first check out all files listed in the cache (but not
+ * overwrite any old ones), and then force-checkout "file.c" a
+ * second time (ie that one _will_ overwrite any old contents
+ * with the same filename).
+ *
+ * Also, just doing "git-checkout-cache" does nothing. You probably
+ * meant "git-checkout-cache -a". And if you want to force it, you
+ * want "git-checkout-cache -f -a".
+ *
+ * Intuitiveness is not the goal here. Repeatability is. The
+ * reason for the "no arguments means no work" thing is that
+ * from scripts you are supposed to be able to do things like
+ *
+ * find . -name '*.h' -print0 | xargs -0 git-checkout-cache -f --
+ *
+ * which will force all existing *.h files to be replaced with
+ * their cached copies. If an empty command line implied "all",
+ * then this would force-refresh everything in the cache, which
+ * was not the point.
+ *
+ * Oh, and the "--" is just a good idea when you know the rest
+ * will be filenames. Just so that you wouldn't have a filename
+ * of "-a" causing problems (not possible in the above example,
+ * but get used to it in scripting!).
+ */
+#include "cache.h"
+
+static struct checkout state = {
+ .base_dir = "",
+ .base_dir_len = 0,
+ .force = 0,
+ .quiet = 0,
+ .not_new = 0,
+ .refresh_cache = 0,
+};
+
+static int checkout_file(const char *name)
+{
+ int pos = cache_name_pos(name, strlen(name));
+ if (pos < 0) {
+ if (!state.quiet) {
+ pos = -pos - 1;
+ fprintf(stderr,
+ "git-checkout-cache: %s is %s.\n",
+ name,
+ (pos < active_nr &&
+ !strcmp(active_cache[pos]->name, name)) ?
+ "unmerged" : "not in the cache");
+ }
+ return -1;
+ }
+ return checkout_entry(active_cache[pos], &state);
+}
+
+static int checkout_all(void)
+{
+ int i;
+
+ for (i = 0; i < active_nr ; i++) {
+ struct cache_entry *ce = active_cache[i];
+ if (ce_stage(ce))
+ continue;
+ if (checkout_entry(ce, &state) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static const char *checkout_cache_usage =
+"git-checkout-cache [-u] [-q] [-a] [-f] [-n] [--prefix=<string>] [--] <file>...";
+
+int main(int argc, char **argv)
+{
+ int i, force_filename = 0;
+ struct cache_file cache_file;
+ int newfd = -1;
+
+ if (read_cache() < 0) {
+ die("invalid cache");
+ }
+
+ for (i = 1; i < argc; i++) {
+ const char *arg = argv[i];
+ if (!force_filename) {
+ if (!strcmp(arg, "-a")) {
+ checkout_all();
+ continue;
+ }
+ if (!strcmp(arg, "--")) {
+ force_filename = 1;
+ continue;
+ }
+ if (!strcmp(arg, "-f")) {
+ state.force = 1;
+ continue;
+ }
+ if (!strcmp(arg, "-q")) {
+ state.quiet = 1;
+ continue;
+ }
+ if (!strcmp(arg, "-n")) {
+ state.not_new = 1;
+ continue;
+ }
+ if (!strcmp(arg, "-u")) {
+ state.refresh_cache = 1;
+ if (newfd < 0)
+ newfd = hold_index_file_for_update
+ (&cache_file,
+ get_index_file());
+ if (newfd < 0)
+ die("cannot open index.lock file.");
+ continue;
+ }
+ if (!memcmp(arg, "--prefix=", 9)) {
+ state.base_dir = arg+9;
+ state.base_dir_len = strlen(state.base_dir);
+ continue;
+ }
+ if (arg[0] == '-')
+ usage(checkout_cache_usage);
+ }
+ if (state.base_dir_len) {
+ /* when --prefix is specified we do not
+ * want to update cache.
+ */
+ if (state.refresh_cache) {
+ close(newfd); newfd = -1;
+ rollback_index_file(&cache_file);
+ }
+ state.refresh_cache = 0;
+ }
+ checkout_file(arg);
+ }
+
+ if (0 <= newfd &&
+ (write_cache(newfd, active_cache, active_nr) ||
+ commit_index_file(&cache_file)))
+ die("Unable to write new cachefile");
+ return 0;
+}
diff --git a/clone-pack.c b/clone-pack.c
new file mode 100644
index 0000000..e9c20de
--- /dev/null
+++ b/clone-pack.c
@@ -0,0 +1,182 @@
+#include "cache.h"
+#include "refs.h"
+#include "pkt-line.h"
+#include <sys/wait.h>
+
+static int quiet;
+static const char clone_pack_usage[] = "git-clone-pack [-q] [--exec=<git-upload-pack>] [<host>:]<directory> [<heads>]*";
+static const char *exec = "git-upload-pack";
+
+static void clone_handshake(int fd[2], struct ref *ref)
+{
+ unsigned char sha1[20];
+
+ while (ref) {
+ packet_write(fd[1], "want %s\n", sha1_to_hex(ref->old_sha1));
+ ref = ref->next;
+ }
+ packet_flush(fd[1]);
+
+ /* We don't have nuttin' */
+ packet_write(fd[1], "done\n");
+ if (get_ack(fd[0], sha1))
+ error("Huh! git-clone-pack got positive ack for %s", sha1_to_hex(sha1));
+}
+
+static int is_master(struct ref *ref)
+{
+ return !strcmp(ref->name, "refs/heads/master");
+}
+
+static void write_one_ref(struct ref *ref)
+{
+ char *path = git_path(ref->name);
+ int fd;
+ char *hex;
+
+ if (safe_create_leading_directories(path))
+ die("unable to create leading directory for %s", ref->name);
+ fd = open(path, O_CREAT | O_EXCL | O_WRONLY, 0666);
+ if (fd < 0)
+ die("unable to create ref %s", ref->name);
+ hex = sha1_to_hex(ref->old_sha1);
+ hex[40] = '\n';
+ if (write(fd, hex, 41) != 41)
+ die("unable to write ref %s", ref->name);
+ close(fd);
+}
+
+static void write_refs(struct ref *ref)
+{
+ struct ref *head = NULL, *head_ptr, *master_ref;
+ char *head_path;
+
+ if (!strcmp(ref->name, "HEAD")) {
+ head = ref;
+ ref = ref->next;
+ }
+ head_ptr = NULL;
+ master_ref = NULL;
+ while (ref) {
+ if (is_master(ref))
+ master_ref = ref;
+ if (head && !memcmp(ref->old_sha1, head->old_sha1, 20)) {
+ if (!head_ptr || ref == master_ref)
+ head_ptr = ref;
+ }
+ write_one_ref(ref);
+ ref = ref->next;
+ }
+ if (!head)
+ return;
+
+ head_path = git_path("HEAD");
+ if (!head_ptr) {
+ /*
+ * If we had a master ref, and it wasn't HEAD, we need to undo the
+ * symlink, and write a standalone HEAD. Give a warning, because that's
+ * really really wrong.
+ */
+ if (master_ref) {
+ error("HEAD doesn't point to any refs! Making standalone HEAD");
+ unlink(head_path);
+ }
+ write_one_ref(head);
+ return;
+ }
+
+ /* We reset to the master branch if it's available */
+ if (master_ref)
+ return;
+
+ /*
+ * Uhhuh. Other end didn't have master. We start HEAD off with
+ * the first branch with the same value.
+ */
+ unlink(head_path);
+ if (symlink(head_ptr->name, head_path) < 0)
+ die("unable to link HEAD to %s", head_ptr->name);
+}
+
+static int clone_pack(int fd[2], int nr_match, char **match)
+{
+ struct ref *refs;
+ int status;
+ pid_t pid;
+
+ get_remote_heads(fd[0], &refs, nr_match, match);
+ if (!refs) {
+ packet_flush(fd[1]);
+ die("no matching remote head");
+ }
+ clone_handshake(fd, refs);
+ pid = fork();
+ if (pid < 0)
+ die("git-clone-pack: unable to fork off git-unpack-objects");
+ if (!pid) {
+ dup2(fd[0], 0);
+ close(fd[0]);
+ close(fd[1]);
+ execlp("git-unpack-objects", "git-unpack-objects",
+ quiet ? "-q" : NULL, NULL);
+ die("git-unpack-objects exec failed");
+ }
+ close(fd[0]);
+ close(fd[1]);
+ while (waitpid(pid, &status, 0) < 0) {
+ if (errno != EINTR)
+ die("waiting for git-unpack-objects: %s", strerror(errno));
+ }
+ if (WIFEXITED(status)) {
+ int code = WEXITSTATUS(status);
+ if (code)
+ die("git-unpack-objects died with error code %d", code);
+ write_refs(refs);
+ return 0;
+ }
+ if (WIFSIGNALED(status)) {
+ int sig = WTERMSIG(status);
+ die("git-unpack-objects died of signal %d", sig);
+ }
+ die("Sherlock Holmes! git-unpack-objects died of unnatural causes %d!", status);
+}
+
+int main(int argc, char **argv)
+{
+ int i, ret, nr_heads;
+ char *dest = NULL, **heads;
+ int fd[2];
+ pid_t pid;
+
+ nr_heads = 0;
+ heads = NULL;
+ for (i = 1; i < argc; i++) {
+ char *arg = argv[i];
+
+ if (*arg == '-') {
+ if (!strcmp("-q", arg)) {
+ quiet = 1;
+ continue;
+ }
+ if (!strncmp("--exec=", arg, 7)) {
+ exec = arg + 7;
+ continue;
+ }
+ usage(clone_pack_usage);
+ }
+ dest = arg;
+ heads = argv + i + 1;
+ nr_heads = argc - i - 1;
+ break;
+ }
+ if (!dest)
+ usage(clone_pack_usage);
+ pid = git_connect(fd, dest, exec);
+ if (pid < 0)
+ return 1;
+ ret = clone_pack(fd, nr_heads, heads);
+ close(fd[0]);
+ close(fd[1]);
+ finish_connect(pid);
+ return ret;
+}
diff --git a/commit-tree.c b/commit-tree.c
new file mode 100644
index 0000000..d545f62
--- /dev/null
+++ b/commit-tree.c
@@ -0,0 +1,131 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "cache.h"
+
+#include <pwd.h>
+#include <time.h>
+#include <ctype.h>
+
+#define BLOCKING (1ul << 14)
+
+/*
+ * FIXME! Share the code with "write-tree.c"
+ */
+static void init_buffer(char **bufp, unsigned int *sizep)
+{
+ char *buf = xmalloc(BLOCKING);
+ *sizep = 0;
+ *bufp = buf;
+}
+
+static void add_buffer(char **bufp, unsigned int *sizep, const char *fmt, ...)
+{
+ char one_line[2048];
+ va_list args;
+ int len;
+ unsigned long alloc, size, newsize;
+ char *buf;
+
+ va_start(args, fmt);
+ len = vsnprintf(one_line, sizeof(one_line), fmt, args);
+ va_end(args);
+ size = *sizep;
+ newsize = size + len;
+ alloc = (size + 32767) & ~32767;
+ buf = *bufp;
+ if (newsize > alloc) {
+ alloc = (newsize + 32767) & ~32767;
+ buf = xrealloc(buf, alloc);
+ *bufp = buf;
+ }
+ *sizep = newsize;
+ memcpy(buf + size, one_line, len);
+}
+
+static void check_valid(unsigned char *sha1, const char *expect)
+{
+ void *buf;
+ char type[20];
+ unsigned long size;
+
+ buf = read_sha1_file(sha1, type, &size);
+ if (!buf || strcmp(type, expect))
+ die("%s is not a valid '%s' object", sha1_to_hex(sha1), expect);
+ free(buf);
+}
+
+/*
+ * Having more than two parents is not strange at all, and this is
+ * how multi-way merges are represented.
+ */
+#define MAXPARENT (16)
+static unsigned char parent_sha1[MAXPARENT][20];
+
+static char *commit_tree_usage = "git-commit-tree <sha1> [-p <sha1>]* < changelog";
+
+static int new_parent(int idx)
+{
+ int i;
+ unsigned char *sha1 = parent_sha1[idx];
+ for (i = 0; i < idx; i++) {
+ if (!memcmp(parent_sha1[i], sha1, 20)) {
+ error("duplicate parent %s ignored", sha1_to_hex(sha1));
+ return 0;
+ }
+ }
+ return 1;
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+ int parents = 0;
+ unsigned char tree_sha1[20];
+ unsigned char commit_sha1[20];
+ char comment[1000];
+ char *buffer;
+ unsigned int size;
+
+ if (argc < 2 || get_sha1_hex(argv[1], tree_sha1) < 0)
+ usage(commit_tree_usage);
+
+ check_valid(tree_sha1, "tree");
+ for (i = 2; i < argc; i += 2) {
+ char *a, *b;
+ a = argv[i]; b = argv[i+1];
+ if (!b || strcmp(a, "-p") || get_sha1(b, parent_sha1[parents]))
+ usage(commit_tree_usage);
+ check_valid(parent_sha1[parents], "commit");
+ if (new_parent(parents))
+ parents++;
+ }
+ if (!parents)
+ fprintf(stderr, "Committing initial tree %s\n", argv[1]);
+ setup_ident();
+
+ init_buffer(&buffer, &size);
+ add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
+
+ /*
+ * NOTE! This ordering means that the same exact tree merged with a
+ * different order of parents will be a _different_ changeset even
+ * if everything else stays the same.
+ */
+ for (i = 0; i < parents; i++)
+ add_buffer(&buffer, &size, "parent %s\n", sha1_to_hex(parent_sha1[i]));
+
+ /* Person/date information */
+ add_buffer(&buffer, &size, "author %s\n", git_author_info());
+ add_buffer(&buffer, &size, "committer %s\n\n", git_committer_info());
+
+ /* And add the comment */
+ while (fgets(comment, sizeof(comment), stdin) != NULL)
+ add_buffer(&buffer, &size, "%s", comment);
+
+ write_sha1_file(buffer, size, "commit", commit_sha1);
+ printf("%s\n", sha1_to_hex(commit_sha1));
+ return 0;
+}
diff --git a/commit.c b/commit.c
new file mode 100644
index 0000000..d4cd455
--- /dev/null
+++ b/commit.c
@@ -0,0 +1,462 @@
+#include <ctype.h>
+#include "tag.h"
+#include "commit.h"
+#include "cache.h"
+
+struct sort_node
+{
+ /*
+ * the number of children of the associated commit
+ * that also occur in the list being sorted.
+ */
+ unsigned int indegree;
+
+ /*
+ * reference to original list item that we will re-use
+ * on output.
+ */
+ struct commit_list * list_item;
+
+};
+
+const char *commit_type = "commit";
+
+enum cmit_fmt get_commit_format(const char *arg)
+{
+ if (!*arg)
+ return CMIT_FMT_DEFAULT;
+ if (!strcmp(arg, "=raw"))
+ return CMIT_FMT_RAW;
+ if (!strcmp(arg, "=medium"))
+ return CMIT_FMT_MEDIUM;
+ if (!strcmp(arg, "=short"))
+ return CMIT_FMT_SHORT;
+ if (!strcmp(arg, "=full"))
+ return CMIT_FMT_FULL;
+ die("invalid --pretty format");
+}
+
+static struct commit *check_commit(struct object *obj, const unsigned char *sha1)
+{
+ if (obj->type != commit_type) {
+ error("Object %s is a %s, not a commit",
+ sha1_to_hex(sha1), obj->type);
+ return NULL;
+ }
+ return (struct commit *) obj;
+}
+
+struct commit *lookup_commit_reference(const unsigned char *sha1)
+{
+ struct object *obj = parse_object(sha1);
+
+ if (!obj)
+ return NULL;
+ while (obj->type == tag_type)
+ obj = parse_object(((struct tag *)obj)->tagged->sha1);
+
+ return check_commit(obj, sha1);
+}
+
+struct commit *lookup_commit(const unsigned char *sha1)
+{
+ struct object *obj = lookup_object(sha1);
+ if (!obj) {
+ struct commit *ret = xmalloc(sizeof(struct commit));
+ memset(ret, 0, sizeof(struct commit));
+ created_object(sha1, &ret->object);
+ ret->object.type = commit_type;
+ return ret;
+ }
+ if (!obj->type)
+ obj->type = commit_type;
+ return check_commit(obj, sha1);
+}
+
+static unsigned long parse_commit_date(const char *buf)
+{
+ unsigned long date;
+
+ if (memcmp(buf, "author", 6))
+ return 0;
+ while (*buf++ != '\n')
+ /* nada */;
+ if (memcmp(buf, "committer", 9))
+ return 0;
+ while (*buf++ != '>')
+ /* nada */;
+ date = strtoul(buf, NULL, 10);
+ if (date == ULONG_MAX)
+ date = 0;
+ return date;
+}
+
+int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
+{
+ char *bufptr = buffer;
+ unsigned char parent[20];
+ struct commit_list **pptr;
+
+ if (item->object.parsed)
+ return 0;
+ item->object.parsed = 1;
+ if (memcmp(bufptr, "tree ", 5))
+ return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
+ if (get_sha1_hex(bufptr + 5, parent) < 0)
+ return error("bad tree pointer in commit %s\n", sha1_to_hex(item->object.sha1));
+ item->tree = lookup_tree(parent);
+ if (item->tree)
+ add_ref(&item->object, &item->tree->object);
+ bufptr += 46; /* "tree " + "hex sha1" + "\n" */
+ pptr = &item->parents;
+ while (!memcmp(bufptr, "parent ", 7)) {
+ struct commit *new_parent;
+
+ if (get_sha1_hex(bufptr + 7, parent) || bufptr[47] != '\n')
+ return error("bad parents in commit %s", sha1_to_hex(item->object.sha1));
+ new_parent = lookup_commit(parent);
+ if (new_parent) {
+ pptr = &commit_list_insert(new_parent, pptr)->next;
+ add_ref(&item->object, &new_parent->object);
+ }
+ bufptr += 48;
+ }
+ item->date = parse_commit_date(bufptr);
+ return 0;
+}
+
+int parse_commit(struct commit *item)
+{
+ char type[20];
+ void *buffer;
+ unsigned long size;
+ int ret;
+
+ if (item->object.parsed)
+ return 0;
+ buffer = read_sha1_file(item->object.sha1, type, &size);
+ if (!buffer)
+ return error("Could not read %s",
+ sha1_to_hex(item->object.sha1));
+ if (strcmp(type, commit_type)) {
+ free(buffer);
+ return error("Object %s not a commit",
+ sha1_to_hex(item->object.sha1));
+ }
+ ret = parse_commit_buffer(item, buffer, size);
+ if (!ret) {
+ item->buffer = buffer;
+ return 0;
+ }
+ free(buffer);
+ return ret;
+}
+
+struct commit_list *commit_list_insert(struct commit *item, struct commit_list **list_p)
+{
+ struct commit_list *new_list = xmalloc(sizeof(struct commit_list));
+ new_list->item = item;
+ new_list->next = *list_p;
+ *list_p = new_list;
+ return new_list;
+}
+
+void free_commit_list(struct commit_list *list)
+{
+ while (list) {
+ struct commit_list *temp = list;
+ list = temp->next;
+ free(temp);
+ }
+}
+
+struct commit_list * insert_by_date(struct commit *item, struct commit_list **list)
+{
+ struct commit_list **pp = list;
+ struct commit_list *p;
+ while ((p = *pp) != NULL) {
+ if (p->item->date < item->date) {
+ break;
+ }
+ pp = &p->next;
+ }
+ return commit_list_insert(item, pp);
+}
+
+
+void sort_by_date(struct commit_list **list)
+{
+ struct commit_list *ret = NULL;
+ while (*list) {
+ insert_by_date((*list)->item, &ret);
+ *list = (*list)->next;
+ }
+ *list = ret;
+}
+
+struct commit *pop_most_recent_commit(struct commit_list **list,
+ unsigned int mark)
+{
+ struct commit *ret = (*list)->item;
+ struct commit_list *parents = ret->parents;
+ struct commit_list *old = *list;
+
+ *list = (*list)->next;
+ free(old);
+
+ while (parents) {
+ struct commit *commit = parents->item;
+ parse_commit(commit);
+ if (!(commit->object.flags & mark)) {
+ commit->object.flags |= mark;
+ insert_by_date(commit, list);
+ }
+ parents = parents->next;
+ }
+ return ret;
+}
+
+/*
+ * Generic support for pretty-printing the header
+ */
+static int get_one_line(const char *msg, unsigned long len)
+{
+ int ret = 0;
+
+ while (len--) {
+ char c = *msg++;
+ ret++;
+ if (c == '\n')
+ break;
+ if (!c)
+ return 0;
+ }
+ return ret;
+}
+
+static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, const char *line)
+{
+ char *date;
+ unsigned int namelen;
+ unsigned long time;
+ int tz, ret;
+
+ date = strchr(line, '>');
+ if (!date)
+ return 0;
+ namelen = ++date - line;
+ time = strtoul(date, &date, 10);
+ tz = strtol(date, NULL, 10);
+
+ ret = sprintf(buf, "%s: %.*s\n", what, namelen, line);
+ if (fmt == CMIT_FMT_MEDIUM)
+ ret += sprintf(buf + ret, "Date: %s\n", show_date(time, tz));
+ return ret;
+}
+
+static int is_empty_line(const char *line, int len)
+{
+ while (len && isspace(line[len-1]))
+ len--;
+ return !len;
+}
+
+static int add_parent_info(enum cmit_fmt fmt, char *buf, const char *line, int parents)
+{
+ int offset = 0;
+ switch (parents) {
+ case 1:
+ break;
+ case 2:
+ /* Go back to the previous line: 40 characters of previous parent, and one '\n' */
+ offset = sprintf(buf, "Merge: %.40s\n", line-41);
+ /* Fallthrough */
+ default:
+ /* Replace the previous '\n' with a space */
+ buf[offset-1] = ' ';
+ offset += sprintf(buf + offset, "%.40s\n", line+7);
+ }
+ return offset;
+}
+
+unsigned long pretty_print_commit(enum cmit_fmt fmt, const char *msg, unsigned long len, char *buf, unsigned long space)
+{
+ int hdr = 1, body = 0;
+ unsigned long offset = 0;
+ int parents = 0;
+
+ for (;;) {
+ const char *line = msg;
+ int linelen = get_one_line(msg, len);
+
+ if (!linelen)
+ break;
+
+ /*
+ * We want some slop for indentation and a possible
+ * final "...". Thus the "+ 20".
+ */
+ if (offset + linelen + 20 > space) {
+ memcpy(buf + offset, " ...\n", 8);
+ offset += 8;
+ break;
+ }
+
+ msg += linelen;
+ len -= linelen;
+ if (hdr) {
+ if (linelen == 1) {
+ hdr = 0;
+ buf[offset++] = '\n';
+ continue;
+ }
+ if (fmt == CMIT_FMT_RAW) {
+ memcpy(buf + offset, line, linelen);
+ offset += linelen;
+ continue;
+ }
+ if (!memcmp(line, "parent ", 7)) {
+ if (linelen != 48)
+ die("bad parent line in commit");
+ offset += add_parent_info(fmt, buf + offset, line, ++parents);
+ }
+ if (!memcmp(line, "author ", 7))
+ offset += add_user_info("Author", fmt, buf + offset, line + 7);
+ if (fmt == CMIT_FMT_FULL) {
+ if (!memcmp(line, "committer ", 10))
+ offset += add_user_info("Commit", fmt, buf + offset, line + 10);
+ }
+ continue;
+ }
+
+ if (is_empty_line(line, linelen)) {
+ if (!body)
+ continue;
+ if (fmt == CMIT_FMT_SHORT)
+ break;
+ } else {
+ body = 1;
+ }
+ memset(buf + offset, ' ', 4);
+ memcpy(buf + offset + 4, line, linelen);
+ offset += linelen + 4;
+ }
+ /* Make sure there is an EOLN */
+ if (buf[offset - 1] != '\n')
+ buf[offset++] = '\n';
+ buf[offset] = '\0';
+ return offset;
+}
+
+struct commit *pop_commit(struct commit_list **stack)
+{
+ struct commit_list *top = *stack;
+ struct commit *item = top ? top->item : NULL;
+
+ if (top) {
+ *stack = top->next;
+ free(top);
+ }
+ return item;
+}
+
+int count_parents(struct commit * commit)
+{
+ int count = 0;
+ struct commit_list * parents = commit->parents;
+ for (count=0;parents; parents=parents->next,count++)
+ ;
+ return count;
+}
+
+/*
+ * Performs an in-place topological sort on the list supplied.
+ */
+void sort_in_topological_order(struct commit_list ** list)
+{
+ struct commit_list * next = *list;
+ struct commit_list * work = NULL;
+ struct commit_list ** pptr = list;
+ struct sort_node * nodes;
+ struct sort_node * next_nodes;
+ int count = 0;
+
+ /* determine the size of the list */
+ while (next) {
+ next = next->next;
+ count++;
+ }
+ /* allocate an array to help sort the list */
+ nodes = xcalloc(count, sizeof(*nodes));
+ /* link the list to the array */
+ next_nodes = nodes;
+ next=*list;
+ while (next) {
+ next_nodes->list_item = next;
+ next->item->object.util = next_nodes;
+ next_nodes++;
+ next = next->next;
+ }
+ /* update the indegree */
+ next=*list;
+ while (next) {
+ struct commit_list * parents = next->item->parents;
+ while (parents) {
+ struct commit * parent=parents->item;
+ struct sort_node * pn = (struct sort_node *)parent->object.util;
+
+ if (pn)
+ pn->indegree++;
+ parents=parents->next;
+ }
+ next=next->next;
+ }
+ /*
+ * find the tips
+ *
+ * tips are nodes not reachable from any other node in the list
+ *
+ * the tips serve as a starting set for the work queue.
+ */
+ next=*list;
+ while (next) {
+ struct sort_node * node = (struct sort_node *)next->item->object.util;
+
+ if (node->indegree == 0) {
+ commit_list_insert(next->item, &work);
+ }
+ next=next->next;
+ }
+ /* process the list in topological order */
+ while (work) {
+ struct commit * work_item = pop_commit(&work);
+ struct sort_node * work_node = (struct sort_node *)work_item->object.util;
+ struct commit_list * parents = work_item->parents;
+
+ while (parents) {
+ struct commit * parent=parents->item;
+ struct sort_node * pn = (struct sort_node *)parent->object.util;
+
+ if (pn) {
+ /*
+ * parents are only enqueued for emission
+ * when all their children have been emitted thereby
+ * guaranteeing topological order.
+ */
+ pn->indegree--;
+ if (!pn->indegree)
+ commit_list_insert(parent, &work);
+ }
+ parents=parents->next;
+ }
+ /*
+ * work_item is a commit all of whose children
+ * have already been emitted. we can emit it now.
+ */
+ *pptr = work_node->list_item;
+ pptr = &(*pptr)->next;
+ *pptr = NULL;
+ work_item->object.util = NULL;
+ }
+ free(nodes);
+}
diff --git a/commit.h b/commit.h
new file mode 100644
index 0000000..c24ab21
--- /dev/null
+++ b/commit.h
@@ -0,0 +1,70 @@
+#ifndef COMMIT_H
+#define COMMIT_H
+
+#include "object.h"
+#include "tree.h"
+
+struct commit_list {
+ struct commit *item;
+ struct commit_list *next;
+};
+
+struct commit {
+ struct object object;
+ unsigned long date;
+ struct commit_list *parents;
+ struct tree *tree;
+ char *buffer;
+};
+
+extern const char *commit_type;
+
+struct commit *lookup_commit(const unsigned char *sha1);
+struct commit *lookup_commit_reference(const unsigned char *sha1);
+
+int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size);
+
+int parse_commit(struct commit *item);
+
+struct commit_list * commit_list_insert(struct commit *item, struct commit_list **list_p);
+struct commit_list * insert_by_date(struct commit *item, struct commit_list **list);
+
+void free_commit_list(struct commit_list *list);
+
+void sort_by_date(struct commit_list **list);
+
+/* Commit formats */
+enum cmit_fmt {
+ CMIT_FMT_RAW,
+ CMIT_FMT_MEDIUM,
+ CMIT_FMT_DEFAULT = CMIT_FMT_MEDIUM,
+ CMIT_FMT_SHORT,
+ CMIT_FMT_FULL,
+};
+
+extern enum cmit_fmt get_commit_format(const char *arg);
+extern unsigned long pretty_print_commit(enum cmit_fmt fmt, const char *msg, unsigned long len, char *buf, unsigned long space);
+
+/** Removes the first commit from a list sorted by date, and adds all
+ * of its parents.
+ **/
+struct commit *pop_most_recent_commit(struct commit_list **list,
+ unsigned int mark);
+
+struct commit *pop_commit(struct commit_list **stack);
+
+int count_parents(struct commit * commit);
+
+/*
+ * Performs an in-place topological sort of list supplied.
+ *
+ * Pre-conditions:
+ * all commits in input list and all parents of those
+ * commits must have object.util == NULL
+ *
+ * Post-conditions:
+ * invariant of resulting list is:
+ * a reachable from b => ord(b) < ord(a)
+ */
+void sort_in_topological_order(struct commit_list ** list);
+#endif /* COMMIT_H */
diff --git a/connect.c b/connect.c
new file mode 100644
index 0000000..a910af9
--- /dev/null
+++ b/connect.c
@@ -0,0 +1,232 @@
+#include "cache.h"
+#include "pkt-line.h"
+#include "quote.h"
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+
+/*
+ * Read all the refs from the other end
+ */
+struct ref **get_remote_heads(int in, struct ref **list, int nr_match, char **match)
+{
+ *list = NULL;
+ for (;;) {
+ struct ref *ref;
+ unsigned char old_sha1[20];
+ static char buffer[1000];
+ char *name;
+ int len;
+
+ len = packet_read_line(in, buffer, sizeof(buffer));
+ if (!len)
+ break;
+ if (buffer[len-1] == '\n')
+ buffer[--len] = 0;
+
+ if (len < 42 || get_sha1_hex(buffer, old_sha1) || buffer[40] != ' ')
+ die("protocol error: expected sha/ref, got '%s'", buffer);
+ name = buffer + 41;
+ if (nr_match && !path_match(name, nr_match, match))
+ continue;
+ ref = xmalloc(sizeof(*ref) + len - 40);
+ memcpy(ref->old_sha1, old_sha1, 20);
+ memset(ref->new_sha1, 0, 20);
+ memcpy(ref->name, buffer + 41, len - 40);
+ ref->next = NULL;
+ *list = ref;
+ list = &ref->next;
+ }
+ return list;
+}
+
+int get_ack(int fd, unsigned char *result_sha1)
+{
+ static char line[1000];
+ int len = packet_read_line(fd, line, sizeof(line));
+
+ if (!len)
+ die("git-fetch-pack: expected ACK/NAK, got EOF");
+ if (line[len-1] == '\n')
+ line[--len] = 0;
+ if (!strcmp(line, "NAK"))
+ return 0;
+ if (!strncmp(line, "ACK ", 3)) {
+ if (!get_sha1_hex(line+4, result_sha1))
+ return 1;
+ }
+ die("git-fetch_pack: expected ACK/NAK, got '%s'", line);
+}
+
+int path_match(const char *path, int nr, char **match)
+{
+ int i;
+ int pathlen = strlen(path);
+
+ for (i = 0; i < nr; i++) {
+ char *s = match[i];
+ int len = strlen(s);
+
+ if (!len || len > pathlen)
+ continue;
+ if (memcmp(path + pathlen - len, s, len))
+ continue;
+ if (pathlen > len && path[pathlen - len - 1] != '/')
+ continue;
+ *s = 0;
+ return 1;
+ }
+ return 0;
+}
+
+enum protocol {
+ PROTO_LOCAL = 1,
+ PROTO_SSH,
+ PROTO_GIT,
+};
+
+static enum protocol get_protocol(const char *name)
+{
+ if (!strcmp(name, "ssh"))
+ return PROTO_SSH;
+ if (!strcmp(name, "git"))
+ return PROTO_GIT;
+ die("I don't handle protocol '%s'", name);
+}
+
+#define STR_(s) # s
+#define STR(s) STR_(s)
+
+static int git_tcp_connect(int fd[2], const char *prog, char *host, char *path)
+{
+ int sockfd = -1;
+ char *colon, *end;
+ char *port = STR(DEFAULT_GIT_PORT);
+ struct addrinfo hints, *ai0, *ai;
+ int gai;
+
+ if (host[0] == '[') {
+ end = strchr(host + 1, ']');
+ if (end) {
+ *end = 0;
+ end++;
+ host++;
+ } else
+ end = host;
+ } else
+ end = host;
+ colon = strchr(end, ':');
+
+ if (colon) {
+ *colon = 0;
+ port = colon + 1;
+ }
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+
+ gai = getaddrinfo(host, port, &hints, &ai);
+ if (gai)
+ die("Unable to look up %s (%s)", host, gai_strerror(gai));
+
+ for (ai0 = ai; ai; ai = ai->ai_next) {
+ sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (sockfd < 0)
+ continue;
+ if (connect(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) {
+ close(sockfd);
+ sockfd = -1;
+ continue;
+ }
+ break;
+ }
+
+ freeaddrinfo(ai0);
+
+ if (sockfd < 0)
+ die("unable to connect a socket (%s)", strerror(errno));
+
+ fd[0] = sockfd;
+ fd[1] = sockfd;
+ packet_write(sockfd, "%s %s\n", prog, path);
+ return 0;
+}
+
+/*
+ * Yeah, yeah, fixme. Need to pass in the heads etc.
+ */
+int git_connect(int fd[2], char *url, const char *prog)
+{
+ char command[1024];
+ char *host, *path;
+ char *colon;
+ int pipefd[2][2];
+ pid_t pid;
+ enum protocol protocol;
+
+ host = NULL;
+ path = url;
+ colon = strchr(url, ':');
+ protocol = PROTO_LOCAL;
+ if (colon) {
+ *colon = 0;
+ host = url;
+ path = colon+1;
+ protocol = PROTO_SSH;
+ if (!memcmp(path, "//", 2)) {
+ char *slash = strchr(path + 2, '/');
+ if (slash) {
+ int nr = slash - path - 2;
+ memmove(path, path+2, nr);
+ path[nr] = 0;
+ protocol = get_protocol(url);
+ host = path;
+ path = slash;
+ }
+ }
+ }
+
+ if (protocol == PROTO_GIT)
+ return git_tcp_connect(fd, prog, host, path);
+
+ if (pipe(pipefd[0]) < 0 || pipe(pipefd[1]) < 0)
+ die("unable to create pipe pair for communication");
+ pid = fork();
+ if (!pid) {
+ snprintf(command, sizeof(command), "%s %s", prog,
+ sq_quote(path));
+ dup2(pipefd[1][0], 0);
+ dup2(pipefd[0][1], 1);
+ close(pipefd[0][0]);
+ close(pipefd[0][1]);
+ close(pipefd[1][0]);
+ close(pipefd[1][1]);
+ if (protocol == PROTO_SSH)
+ execlp("ssh", "ssh", host, command, NULL);
+ else
+ execlp("sh", "sh", "-c", command, NULL);
+ die("exec failed");
+ }
+ fd[0] = pipefd[0][0];
+ fd[1] = pipefd[1][1];
+ close(pipefd[0][1]);
+ close(pipefd[1][0]);
+ return pid;
+}
+
+int finish_connect(pid_t pid)
+{
+ int ret;
+
+ for (;;) {
+ ret = waitpid(pid, NULL, 0);
+ if (!ret)
+ break;
+ if (errno != EINTR)
+ break;
+ }
+ return ret;
+}
diff --git a/convert-cache.c b/convert-cache.c
new file mode 100644
index 0000000..8916a36
--- /dev/null
+++ b/convert-cache.c
@@ -0,0 +1,326 @@
+#define _XOPEN_SOURCE /* glibc2 needs this */
+#include <time.h>
+#include <ctype.h>
+#include "cache.h"
+
+struct entry {
+ unsigned char old_sha1[20];
+ unsigned char new_sha1[20];
+ int converted;
+};
+
+#define MAXOBJECTS (1000000)
+
+static struct entry *convert[MAXOBJECTS];
+static int nr_convert;
+
+static struct entry * convert_entry(unsigned char *sha1);
+
+static struct entry *insert_new(unsigned char *sha1, int pos)
+{
+ struct entry *new = xmalloc(sizeof(struct entry));
+ memset(new, 0, sizeof(*new));
+ memcpy(new->old_sha1, sha1, 20);
+ memmove(convert + pos + 1, convert + pos, (nr_convert - pos) * sizeof(struct entry *));
+ convert[pos] = new;
+ nr_convert++;
+ if (nr_convert == MAXOBJECTS)
+ die("you're kidding me - hit maximum object limit");
+ return new;
+}
+
+static struct entry *lookup_entry(unsigned char *sha1)
+{
+ int low = 0, high = nr_convert;
+
+ while (low < high) {
+ int next = (low + high) / 2;
+ struct entry *n = convert[next];
+ int cmp = memcmp(sha1, n->old_sha1, 20);
+ if (!cmp)
+ return n;
+ if (cmp < 0) {
+ high = next;
+ continue;
+ }
+ low = next+1;
+ }
+ return insert_new(sha1, low);
+}
+
+static void convert_binary_sha1(void *buffer)
+{
+ struct entry *entry = convert_entry(buffer);
+ memcpy(buffer, entry->new_sha1, 20);
+}
+
+static void convert_ascii_sha1(void *buffer)
+{
+ unsigned char sha1[20];
+ struct entry *entry;
+
+ if (get_sha1_hex(buffer, sha1))
+ die("expected sha1, got '%s'", buffer);
+ entry = convert_entry(sha1);
+ memcpy(buffer, sha1_to_hex(entry->new_sha1), 40);
+}
+
+static unsigned int convert_mode(unsigned int mode)
+{
+ unsigned int newmode;
+
+ newmode = mode & S_IFMT;
+ if (S_ISREG(mode))
+ newmode |= (mode & 0100) ? 0755 : 0644;
+ return newmode;
+}
+
+static int write_subdirectory(void *buffer, unsigned long size, const char *base, int baselen, unsigned char *result_sha1)
+{
+ char *new = xmalloc(size);
+ unsigned long newlen = 0;
+ unsigned long used;
+
+ used = 0;
+ while (size) {
+ int len = 21 + strlen(buffer);
+ char *path = strchr(buffer, ' ');
+ unsigned char *sha1;
+ unsigned int mode;
+ char *slash, *origpath;
+
+ if (!path || sscanf(buffer, "%o", &mode) != 1)
+ die("bad tree conversion");
+ mode = convert_mode(mode);
+ path++;
+ if (memcmp(path, base, baselen))
+ break;
+ origpath = path;
+ path += baselen;
+ slash = strchr(path, '/');
+ if (!slash) {
+ newlen += sprintf(new + newlen, "%o %s", mode, path);
+ new[newlen++] = '\0';
+ memcpy(new + newlen, buffer + len - 20, 20);
+ newlen += 20;
+
+ used += len;
+ size -= len;
+ buffer += len;
+ continue;
+ }
+
+ newlen += sprintf(new + newlen, "%o %.*s", S_IFDIR, (int)(slash - path), path);
+ new[newlen++] = 0;
+ sha1 = (unsigned char *)(new + newlen);
+ newlen += 20;
+
+ len = write_subdirectory(buffer, size, origpath, slash-origpath+1, sha1);
+
+ used += len;
+ size -= len;
+ buffer += len;
+ }
+
+ write_sha1_file(new, newlen, "tree", result_sha1);
+ free(new);
+ return used;
+}
+
+static void convert_tree(void *buffer, unsigned long size, unsigned char *result_sha1)
+{
+ void *orig_buffer = buffer;
+ unsigned long orig_size = size;
+
+ while (size) {
+ int len = 1+strlen(buffer);
+
+ convert_binary_sha1(buffer + len);
+
+ len += 20;
+ if (len > size)
+ die("corrupt tree object");
+ size -= len;
+ buffer += len;
+ }
+
+ write_subdirectory(orig_buffer, orig_size, "", 0, result_sha1);
+}
+
+static unsigned long parse_oldstyle_date(const char *buf)
+{
+ char c, *p;
+ char buffer[100];
+ struct tm tm;
+ const char *formats[] = {
+ "%c",
+ "%a %b %d %T",
+ "%Z",
+ "%Y",
+ " %Y",
+ NULL
+ };
+ /* We only ever did two timezones in the bad old format .. */
+ const char *timezones[] = {
+ "PDT", "PST", "CEST", NULL
+ };
+ const char **fmt = formats;
+
+ p = buffer;
+ while (isspace(c = *buf))
+ buf++;
+ while ((c = *buf++) != '\n')
+ *p++ = c;
+ *p++ = 0;
+ buf = buffer;
+ memset(&tm, 0, sizeof(tm));
+ do {
+ const char *next = strptime(buf, *fmt, &tm);
+ if (next) {
+ if (!*next)
+ return mktime(&tm);
+ buf = next;
+ } else {
+ const char **p = timezones;
+ while (isspace(*buf))
+ buf++;
+ while (*p) {
+ if (!memcmp(buf, *p, strlen(*p))) {
+ buf += strlen(*p);
+ break;
+ }
+ p++;
+ }
+ }
+ fmt++;
+ } while (*buf && *fmt);
+ printf("left: %s\n", buf);
+ return mktime(&tm);
+}
+
+static int convert_date_line(char *dst, void **buf, unsigned long *sp)
+{
+ unsigned long size = *sp;
+ char *line = *buf;
+ char *next = strchr(line, '\n');
+ char *date = strchr(line, '>');
+ int len;
+
+ if (!next || !date)
+ die("missing or bad author/committer line %s", line);
+ next++; date += 2;
+
+ *buf = next;
+ *sp = size - (next - line);
+
+ len = date - line;
+ memcpy(dst, line, len);
+ dst += len;
+
+ /* Is it already in new format? */
+ if (isdigit(*date)) {
+ int datelen = next - date;
+ memcpy(dst, date, datelen);
+ return len + datelen;
+ }
+
+ /*
+ * Hacky hacky: one of the sparse old-style commits does not have
+ * any date at all, but we can fake it by using the committer date.
+ */
+ if (*date == '\n' && strchr(next, '>'))
+ date = strchr(next, '>')+2;
+
+ return len + sprintf(dst, "%lu -0700\n", parse_oldstyle_date(date));
+}
+
+static void convert_date(void *buffer, unsigned long size, unsigned char *result_sha1)
+{
+ char *new = xmalloc(size + 100);
+ unsigned long newlen = 0;
+
+ // "tree <sha1>\n"
+ memcpy(new + newlen, buffer, 46);
+ newlen += 46;
+ buffer += 46;
+ size -= 46;
+
+ // "parent <sha1>\n"
+ while (!memcmp(buffer, "parent ", 7)) {
+ memcpy(new + newlen, buffer, 48);
+ newlen += 48;
+ buffer += 48;
+ size -= 48;
+ }
+
+ // "author xyz <xyz> date"
+ newlen += convert_date_line(new + newlen, &buffer, &size);
+ // "committer xyz <xyz> date"
+ newlen += convert_date_line(new + newlen, &buffer, &size);
+
+ // Rest
+ memcpy(new + newlen, buffer, size);
+ newlen += size;
+
+ write_sha1_file(new, newlen, "commit", result_sha1);
+ free(new);
+}
+
+static void convert_commit(void *buffer, unsigned long size, unsigned char *result_sha1)
+{
+ void *orig_buffer = buffer;
+ unsigned long orig_size = size;
+
+ if (memcmp(buffer, "tree ", 5))
+ die("Bad commit '%s'", buffer);
+ convert_ascii_sha1(buffer+5);
+ buffer += 46; /* "tree " + "hex sha1" + "\n" */
+ while (!memcmp(buffer, "parent ", 7)) {
+ convert_ascii_sha1(buffer+7);
+ buffer += 48;
+ }
+ convert_date(orig_buffer, orig_size, result_sha1);
+}
+
+static struct entry * convert_entry(unsigned char *sha1)
+{
+ struct entry *entry = lookup_entry(sha1);
+ char type[20];
+ void *buffer, *data;
+ unsigned long size;
+
+ if (entry->converted)
+ return entry;
+ data = read_sha1_file(sha1, type, &size);
+ if (!data)
+ die("unable to read object %s", sha1_to_hex(sha1));
+
+ buffer = xmalloc(size);
+ memcpy(buffer, data, size);
+
+ if (!strcmp(type, "blob")) {
+ write_sha1_file(buffer, size, "blob", entry->new_sha1);
+ } else if (!strcmp(type, "tree"))
+ convert_tree(buffer, size, entry->new_sha1);
+ else if (!strcmp(type, "commit"))
+ convert_commit(buffer, size, entry->new_sha1);
+ else
+ die("unknown object type '%s' in %s", type, sha1_to_hex(sha1));
+ entry->converted = 1;
+ free(buffer);
+ free(data);
+ return entry;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned char sha1[20];
+ struct entry *entry;
+
+ if (argc != 2 || get_sha1(argv[1], sha1))
+ usage("git-convert-cache <sha1>");
+
+ entry = convert_entry(sha1);
+ printf("new sha1: %s\n", sha1_to_hex(entry->new_sha1));
+ return 0;
+}
diff --git a/count-delta.c b/count-delta.c
new file mode 100644
index 0000000..7559ff6
--- /dev/null
+++ b/count-delta.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ * The delta-parsing part is almost straight copy of patch-delta.c
+ * which is (C) 2005 Nicolas Pitre <nico@cam.org>.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "delta.h"
+#include "count-delta.h"
+
+/*
+ * NOTE. We do not _interpret_ delta fully. As an approximation, we
+ * just count the number of bytes that are copied from the source, and
+ * the number of literal data bytes that are inserted.
+ *
+ * Number of bytes that are _not_ copied from the source is deletion,
+ * and number of inserted literal bytes are addition, so sum of them
+ * is the extent of damage. xdelta can express an edit that copies
+ * data inside of the destination which originally came from the
+ * source. We do not count that in the following routine, so we are
+ * undercounting the source material that remains in the final output
+ * that way.
+ */
+int count_delta(void *delta_buf, unsigned long delta_size,
+ unsigned long *src_copied, unsigned long *literal_added)
+{
+ unsigned long copied_from_source, added_literal;
+ const unsigned char *data, *top;
+ unsigned char cmd;
+ unsigned long src_size, dst_size, out;
+
+ if (delta_size < DELTA_SIZE_MIN)
+ return -1;
+
+ data = delta_buf;
+ top = delta_buf + delta_size;
+
+ src_size = get_delta_hdr_size(&data);
+ dst_size = get_delta_hdr_size(&data);
+
+ added_literal = copied_from_source = out = 0;
+ while (data < top) {
+ cmd = *data++;
+ if (cmd & 0x80) {
+ unsigned long cp_off = 0, cp_size = 0;
+ if (cmd & 0x01) cp_off = *data++;
+ if (cmd & 0x02) cp_off |= (*data++ << 8);
+ if (cmd & 0x04) cp_off |= (*data++ << 16);
+ if (cmd & 0x08) cp_off |= (*data++ << 24);
+ if (cmd & 0x10) cp_size = *data++;
+ if (cmd & 0x20) cp_size |= (*data++ << 8);
+ if (cp_size == 0) cp_size = 0x10000;
+
+ if (cmd & 0x40)
+ /* copy from dst */
+ ;
+ else
+ copied_from_source += cp_size;
+ out += cp_size;
+ } else {
+ /* write literal into dst */
+ added_literal += cmd;
+ out += cmd;
+ data += cmd;
+ }
+ }
+
+ /* sanity check */
+ if (data != top || out != dst_size)
+ return -1;
+
+ /* delete size is what was _not_ copied from source.
+ * edit size is that and literal additions.
+ */
+ *src_copied = copied_from_source;
+ *literal_added = added_literal;
+ return 0;
+}
diff --git a/count-delta.h b/count-delta.h
new file mode 100644
index 0000000..7359629
--- /dev/null
+++ b/count-delta.h
@@ -0,0 +1,10 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#ifndef COUNT_DELTA_H
+#define COUNT_DELTA_H
+
+int count_delta(void *, unsigned long,
+ unsigned long *src_copied, unsigned long *literal_added);
+
+#endif
diff --git a/csum-file.c b/csum-file.c
new file mode 100644
index 0000000..907efbf
--- /dev/null
+++ b/csum-file.c
@@ -0,0 +1,147 @@
+/*
+ * csum-file.c
+ *
+ * Copyright (C) 2005 Linus Torvalds
+ *
+ * Simple file write infrastructure for writing SHA1-summed
+ * files. Useful when you write a file that you want to be
+ * able to verify hasn't been messed with afterwards.
+ */
+#include "cache.h"
+#include "csum-file.h"
+
+static int sha1flush(struct sha1file *f, unsigned int count)
+{
+ void *buf = f->buffer;
+
+ for (;;) {
+ int ret = write(f->fd, buf, count);
+ if (ret > 0) {
+ buf += ret;
+ count -= ret;
+ if (count)
+ continue;
+ return 0;
+ }
+ if (!ret)
+ die("sha1 file '%s' write error. Out of diskspace", f->name);
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ die("sha1 file '%s' write error (%s)", f->name, strerror(errno));
+ }
+}
+
+int sha1close(struct sha1file *f, unsigned char *result, int update)
+{
+ unsigned offset = f->offset;
+ if (offset) {
+ SHA1_Update(&f->ctx, f->buffer, offset);
+ sha1flush(f, offset);
+ }
+ SHA1_Final(f->buffer, &f->ctx);
+ if (result)
+ memcpy(result, f->buffer, 20);
+ if (update)
+ sha1flush(f, 20);
+ if (close(f->fd))
+ die("%s: sha1 file error on close (%s)", f->name, strerror(errno));
+ return 0;
+}
+
+int sha1write(struct sha1file *f, void *buf, unsigned int count)
+{
+ while (count) {
+ unsigned offset = f->offset;
+ unsigned left = sizeof(f->buffer) - offset;
+ unsigned nr = count > left ? left : count;
+
+ memcpy(f->buffer + offset, buf, nr);
+ count -= nr;
+ offset += nr;
+ buf += nr;
+ left -= nr;
+ if (!left) {
+ SHA1_Update(&f->ctx, f->buffer, offset);
+ sha1flush(f, offset);
+ offset = 0;
+ }
+ f->offset = offset;
+ }
+ return 0;
+}
+
+struct sha1file *sha1create(const char *fmt, ...)
+{
+ struct sha1file *f;
+ unsigned len;
+ va_list arg;
+ int fd;
+
+ f = xmalloc(sizeof(*f));
+
+ va_start(arg, fmt);
+ len = vsnprintf(f->name, sizeof(f->name), fmt, arg);
+ va_end(arg);
+ if (len >= PATH_MAX)
+ die("you wascally wabbit, you");
+ f->namelen = len;
+
+ fd = open(f->name, O_CREAT | O_EXCL | O_WRONLY, 0666);
+ if (fd < 0)
+ die("unable to open %s (%s)", f->name, strerror(errno));
+ f->fd = fd;
+ f->error = 0;
+ f->offset = 0;
+ SHA1_Init(&f->ctx);
+ return f;
+}
+
+struct sha1file *sha1fd(int fd, const char *name)
+{
+ struct sha1file *f;
+ unsigned len;
+
+ f = xmalloc(sizeof(*f));
+
+ len = strlen(name);
+ if (len >= PATH_MAX)
+ die("you wascally wabbit, you");
+ f->namelen = len;
+ memcpy(f->name, name, len+1);
+
+ f->fd = fd;
+ f->error = 0;
+ f->offset = 0;
+ SHA1_Init(&f->ctx);
+ return f;
+}
+
+int sha1write_compressed(struct sha1file *f, void *in, unsigned int size)
+{
+ z_stream stream;
+ unsigned long maxsize;
+ void *out;
+
+ memset(&stream, 0, sizeof(stream));
+ deflateInit(&stream, Z_DEFAULT_COMPRESSION);
+ maxsize = deflateBound(&stream, size);
+ out = xmalloc(maxsize);
+
+ /* Compress it */
+ stream.next_in = in;
+ stream.avail_in = size;
+
+ stream.next_out = out;
+ stream.avail_out = maxsize;
+
+ while (deflate(&stream, Z_FINISH) == Z_OK)
+ /* nothing */;
+ deflateEnd(&stream);
+
+ size = stream.total_out;
+ sha1write(f, out, size);
+ free(out);
+ return size;
+}
+
+
diff --git a/csum-file.h b/csum-file.h
new file mode 100644
index 0000000..776cfb1
--- /dev/null
+++ b/csum-file.h
@@ -0,0 +1,19 @@
+#ifndef CSUM_FILE_H
+#define CSUM_FILE_H
+
+/* A SHA1-protected file */
+struct sha1file {
+ int fd, error;
+ unsigned int offset, namelen;
+ SHA_CTX ctx;
+ char name[PATH_MAX];
+ unsigned char buffer[8192];
+};
+
+extern struct sha1file *sha1fd(int fd, const char *name);
+extern struct sha1file *sha1create(const char *fmt, ...);
+extern int sha1close(struct sha1file *, unsigned char *, int);
+extern int sha1write(struct sha1file *, void *, unsigned int);
+extern int sha1write_compressed(struct sha1file *, void *, unsigned int);
+
+#endif
diff --git a/daemon.c b/daemon.c
new file mode 100644
index 0000000..eeff9e7
--- /dev/null
+++ b/daemon.c
@@ -0,0 +1,356 @@
+#include "cache.h"
+#include "pkt-line.h"
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <netinet/in.h>
+
+static const char daemon_usage[] = "git-daemon [--inetd | --port=n]";
+
+static int upload(char *dir, int dirlen)
+{
+ if (chdir(dir) < 0)
+ return -1;
+ chdir(".git");
+
+ /*
+ * Security on the cheap.
+ *
+ * We want a readable HEAD, usable "objects" directory, and
+ * a "git-daemon-export-ok" flag that says that the other side
+ * is ok with us doing this.
+ */
+ if (access("git-daemon-export-ok", F_OK) ||
+ access("objects/00", X_OK) ||
+ access("HEAD", R_OK))
+ return -1;
+
+ /*
+ * We'll ignore SIGTERM from now on, we have a
+ * good client.
+ */
+ signal(SIGTERM, SIG_IGN);
+
+ /* git-upload-pack only ever reads stuff, so this is safe */
+ execlp("git-upload-pack", "git-upload-pack", ".", NULL);
+ return -1;
+}
+
+static int execute(void)
+{
+ static char line[1000];
+ int len;
+
+ len = packet_read_line(0, line, sizeof(line));
+
+ if (len && line[len-1] == '\n')
+ line[--len] = 0;
+
+ if (!strncmp("git-upload-pack /", line, 17))
+ return upload(line + 16, len - 16);
+
+ fprintf(stderr, "got bad connection '%s'\n", line);
+ return -1;
+}
+
+
+/*
+ * We count spawned/reaped separately, just to avoid any
+ * races when updating them from signals. The SIGCHLD handler
+ * will only update children_reaped, and the fork logic will
+ * only update children_spawned.
+ *
+ * MAX_CHILDREN should be a power-of-two to make the modulus
+ * operation cheap. It should also be at least twice
+ * the maximum number of connections we will ever allow.
+ */
+#define MAX_CHILDREN 128
+
+static int max_connections = 25;
+
+/* These are updated by the signal handler */
+static volatile unsigned int children_reaped = 0;
+pid_t dead_child[MAX_CHILDREN];
+
+/* These are updated by the main loop */
+static unsigned int children_spawned = 0;
+static unsigned int children_deleted = 0;
+
+struct child {
+ pid_t pid;
+ socklen_t addrlen;
+ struct sockaddr_storage address;
+} live_child[MAX_CHILDREN];
+
+static void add_child(int idx, pid_t pid, struct sockaddr *addr, socklen_t addrlen)
+{
+ live_child[idx].pid = pid;
+ live_child[idx].addrlen = addrlen;
+ memcpy(&live_child[idx].address, addr, addrlen);
+}
+
+/*
+ * Walk from "deleted" to "spawned", and remove child "pid".
+ *
+ * We move everything up by one, since the new "deleted" will
+ * be one higher.
+ */
+static void remove_child(pid_t pid, unsigned deleted, unsigned spawned)
+{
+ struct child n;
+
+ deleted %= MAX_CHILDREN;
+ spawned %= MAX_CHILDREN;
+ if (live_child[deleted].pid == pid) {
+ live_child[deleted].pid = -1;
+ return;
+ }
+ n = live_child[deleted];
+ for (;;) {
+ struct child m;
+ deleted = (deleted + 1) % MAX_CHILDREN;
+ if (deleted == spawned)
+ die("could not find dead child %d\n", pid);
+ m = live_child[deleted];
+ live_child[deleted] = n;
+ if (m.pid == pid)
+ return;
+ n = m;
+ }
+}
+
+/*
+ * This gets called if the number of connections grows
+ * past "max_connections".
+ *
+ * We _should_ start off by searching for connections
+ * from the same IP, and if there is some address wth
+ * multiple connections, we should kill that first.
+ *
+ * As it is, we just "randomly" kill 25% of the connections,
+ * and our pseudo-random generator sucks too. I have no
+ * shame.
+ *
+ * Really, this is just a place-holder for a _real_ algorithm.
+ */
+static void kill_some_children(int signo, unsigned start, unsigned stop)
+{
+ start %= MAX_CHILDREN;
+ stop %= MAX_CHILDREN;
+ while (start != stop) {
+ if (!(start & 3))
+ kill(live_child[start].pid, signo);
+ start = (start + 1) % MAX_CHILDREN;
+ }
+}
+
+static void check_max_connections(void)
+{
+ for (;;) {
+ int active;
+ unsigned spawned, reaped, deleted;
+
+ spawned = children_spawned;
+ reaped = children_reaped;
+ deleted = children_deleted;
+
+ while (deleted < reaped) {
+ pid_t pid = dead_child[deleted % MAX_CHILDREN];
+ remove_child(pid, deleted, spawned);
+ deleted++;
+ }
+ children_deleted = deleted;
+
+ active = spawned - deleted;
+ if (active <= max_connections)
+ break;
+
+ /* Kill some unstarted connections with SIGTERM */
+ kill_some_children(SIGTERM, deleted, spawned);
+ if (active <= max_connections << 1)
+ break;
+
+ /* If the SIGTERM thing isn't helping use SIGKILL */
+ kill_some_children(SIGKILL, deleted, spawned);
+ sleep(1);
+ }
+}
+
+static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen)
+{
+ pid_t pid = fork();
+
+ if (pid) {
+ unsigned idx;
+
+ close(incoming);
+ if (pid < 0)
+ return;
+
+ idx = children_spawned % MAX_CHILDREN;
+ children_spawned++;
+ add_child(idx, pid, addr, addrlen);
+
+ check_max_connections();
+ return;
+ }
+
+ dup2(incoming, 0);
+ dup2(incoming, 1);
+ close(incoming);
+ exit(execute());
+}
+
+static void child_handler(int signo)
+{
+ for (;;) {
+ pid_t pid = waitpid(-1, NULL, WNOHANG);
+
+ if (pid > 0) {
+ unsigned reaped = children_reaped;
+ dead_child[reaped % MAX_CHILDREN] = pid;
+ children_reaped = reaped + 1;
+ continue;
+ }
+ break;
+ }
+}
+
+static int serve(int port)
+{
+ struct addrinfo hints, *ai0, *ai;
+ int gai;
+ int socknum = 0, *socklist = NULL;
+ int maxfd = -1;
+ fd_set fds_init, fds;
+ char pbuf[NI_MAXSERV];
+
+ signal(SIGCHLD, child_handler);
+
+ sprintf(pbuf, "%d", port);
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+ hints.ai_flags = AI_PASSIVE;
+
+ gai = getaddrinfo(NULL, pbuf, &hints, &ai0);
+ if (gai)
+ die("getaddrinfo() failed: %s\n", gai_strerror(gai));
+
+ FD_ZERO(&fds_init);
+
+ for (ai = ai0; ai; ai = ai->ai_next) {
+ int sockfd;
+ int *newlist;
+
+ sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (sockfd < 0)
+ continue;
+ if (sockfd >= FD_SETSIZE) {
+ error("too large socket descriptor.");
+ close(sockfd);
+ continue;
+ }
+
+#ifdef IPV6_V6ONLY
+ if (ai->ai_family == AF_INET6) {
+ int on = 1;
+ setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY,
+ &on, sizeof(on));
+ /* Note: error is not fatal */
+ }
+#endif
+
+ if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) {
+ close(sockfd);
+ continue; /* not fatal */
+ }
+ if (listen(sockfd, 5) < 0) {
+ close(sockfd);
+ continue; /* not fatal */
+ }
+
+ newlist = realloc(socklist, sizeof(int) * (socknum + 1));
+ if (!newlist)
+ die("memory allocation failed: %s", strerror(errno));
+
+ socklist = newlist;
+ socklist[socknum++] = sockfd;
+
+ FD_SET(sockfd, &fds_init);
+ if (maxfd < sockfd)
+ maxfd = sockfd;
+ }
+
+ freeaddrinfo(ai0);
+
+ if (socknum == 0)
+ die("unable to allocate any listen sockets on port %u", port);
+
+ for (;;) {
+ int i;
+ fds = fds_init;
+
+ if (select(maxfd + 1, &fds, NULL, NULL, NULL) < 0) {
+ error("select failed, resuming: %s", strerror(errno));
+ sleep(1);
+ continue;
+ }
+
+ for (i = 0; i < socknum; i++) {
+ int sockfd = socklist[i];
+
+ if (FD_ISSET(sockfd, &fds)) {
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+ int incoming = accept(sockfd, (struct sockaddr *)&ss, &sslen);
+ if (incoming < 0) {
+ switch (errno) {
+ case EAGAIN:
+ case EINTR:
+ case ECONNABORTED:
+ continue;
+ default:
+ die("accept returned %s", strerror(errno));
+ }
+ }
+ handle(incoming, (struct sockaddr *)&ss, sslen);
+ }
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int port = DEFAULT_GIT_PORT;
+ int inetd_mode = 0;
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ char *arg = argv[i];
+
+ if (!strncmp(arg, "--port=", 7)) {
+ char *end;
+ unsigned long n;
+ n = strtoul(arg+7, &end, 0);
+ if (arg[7] && !*end) {
+ port = n;
+ continue;
+ }
+ }
+
+ if (!strcmp(arg, "--inetd")) {
+ inetd_mode = 1;
+ continue;
+ }
+
+ usage(daemon_usage);
+ }
+
+ if (inetd_mode)
+ return execute();
+
+ return serve(port);
+}
diff --git a/date.c b/date.c
new file mode 100644
index 0000000..b46f2ce
--- /dev/null
+++ b/date.c
@@ -0,0 +1,459 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+
+#include <ctype.h>
+#include <time.h>
+
+#include "cache.h"
+
+static time_t my_mktime(struct tm *tm)
+{
+ static const int mdays[] = {
+ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
+ };
+ int year = tm->tm_year - 70;
+ int month = tm->tm_mon;
+ int day = tm->tm_mday;
+
+ if (year < 0 || year > 129) /* algo only works for 1970-2099 */
+ return -1;
+ if (month < 0 || month > 11) /* array bounds */
+ return -1;
+ if (month < 2 || (year + 2) % 4)
+ day--;
+ return (year * 365 + (year + 1) / 4 + mdays[month] + day) * 24*60*60UL +
+ tm->tm_hour * 60*60 + tm->tm_min * 60 + tm->tm_sec;
+}
+
+static const char *month_names[] = {
+ "January", "February", "March", "April", "May", "June",
+ "July", "August", "September", "October", "November", "December"
+};
+
+static const char *weekday_names[] = {
+ "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
+};
+
+/*
+ * The "tz" thing is passed in as this strange "decimal parse of tz"
+ * thing, which means that tz -0100 is passed in as the integer -100,
+ * even though it means "sixty minutes off"
+ */
+const char *show_date(unsigned long time, int tz)
+{
+ struct tm *tm;
+ time_t t;
+ static char timebuf[200];
+ int minutes;
+
+ minutes = tz < 0 ? -tz : tz;
+ minutes = (minutes / 100)*60 + (minutes % 100);
+ minutes = tz < 0 ? -minutes : minutes;
+ t = time + minutes * 60;
+ tm = gmtime(&t);
+ if (!tm)
+ return NULL;
+ sprintf(timebuf, "%.3s %.3s %d %02d:%02d:%02d %d %+05d",
+ weekday_names[tm->tm_wday],
+ month_names[tm->tm_mon],
+ tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec,
+ tm->tm_year + 1900, tz);
+ return timebuf;
+}
+
+/*
+ * Check these. And note how it doesn't do the summer-time conversion.
+ *
+ * In my world, it's always summer, and things are probably a bit off
+ * in other ways too.
+ */
+static const struct {
+ const char *name;
+ int offset;
+ int dst;
+} timezone_names[] = {
+ { "IDLW", -12, 0, }, /* International Date Line West */
+ { "NT", -11, 0, }, /* Nome */
+ { "CAT", -10, 0, }, /* Central Alaska */
+ { "HST", -10, 0, }, /* Hawaii Standard */
+ { "HDT", -10, 1, }, /* Hawaii Daylight */
+ { "YST", -9, 0, }, /* Yukon Standard */
+ { "YDT", -9, 1, }, /* Yukon Daylight */
+ { "PST", -8, 0, }, /* Pacific Standard */
+ { "PDT", -8, 1, }, /* Pacific Daylight */
+ { "MST", -7, 0, }, /* Mountain Standard */
+ { "MDT", -7, 1, }, /* Mountain Daylight */
+ { "CST", -6, 0, }, /* Central Standard */
+ { "CDT", -6, 1, }, /* Central Daylight */
+ { "EST", -5, 0, }, /* Eastern Standard */
+ { "EDT", -5, 1, }, /* Eastern Daylight */
+ { "AST", -3, 0, }, /* Atlantic Standard */
+ { "ADT", -3, 1, }, /* Atlantic Daylight */
+ { "WAT", -1, 0, }, /* West Africa */
+
+ { "GMT", 0, 0, }, /* Greenwich Mean */
+ { "UTC", 0, 0, }, /* Universal (Coordinated) */
+
+ { "WET", 0, 0, }, /* Western European */
+ { "BST", 0, 1, }, /* British Summer */
+ { "CET", +1, 0, }, /* Central European */
+ { "MET", +1, 0, }, /* Middle European */
+ { "MEWT", +1, 0, }, /* Middle European Winter */
+ { "MEST", +1, 1, }, /* Middle European Summer */
+ { "CEST", +1, 1, }, /* Central European Summer */
+ { "MESZ", +1, 1, }, /* Middle European Summer */
+ { "FWT", +1, 0, }, /* French Winter */
+ { "FST", +1, 1, }, /* French Summer */
+ { "EET", +2, 0, }, /* Eastern Europe, USSR Zone 1 */
+ { "EEST", +2, 1, }, /* Eastern European Daylight */
+ { "WAST", +7, 0, }, /* West Australian Standard */
+ { "WADT", +7, 1, }, /* West Australian Daylight */
+ { "CCT", +8, 0, }, /* China Coast, USSR Zone 7 */
+ { "JST", +9, 0, }, /* Japan Standard, USSR Zone 8 */
+ { "EAST", +10, 0, }, /* Eastern Australian Standard */
+ { "EADT", +10, 1, }, /* Eastern Australian Daylight */
+ { "GST", +10, 0, }, /* Guam Standard, USSR Zone 9 */
+ { "NZT", +11, 0, }, /* New Zealand */
+ { "NZST", +11, 0, }, /* New Zealand Standard */
+ { "NZDT", +11, 1, }, /* New Zealand Daylight */
+ { "IDLE", +12, 0, }, /* International Date Line East */
+};
+
+#define NR_TZ (sizeof(timezone_names) / sizeof(timezone_names[0]))
+
+static int match_string(const char *date, const char *str)
+{
+ int i = 0;
+
+ for (i = 0; *date; date++, str++, i++) {
+ if (*date == *str)
+ continue;
+ if (toupper(*date) == toupper(*str))
+ continue;
+ if (!isalnum(*date))
+ break;
+ return 0;
+ }
+ return i;
+}
+
+static int skip_alpha(const char *date)
+{
+ int i = 0;
+ do {
+ i++;
+ } while (isalpha(date[i]));
+ return i;
+}
+
+/*
+* Parse month, weekday, or timezone name
+*/
+static int match_alpha(const char *date, struct tm *tm, int *offset)
+{
+ int i;
+
+ for (i = 0; i < 12; i++) {
+ int match = match_string(date, month_names[i]);
+ if (match >= 3) {
+ tm->tm_mon = i;
+ return match;
+ }
+ }
+
+ for (i = 0; i < 7; i++) {
+ int match = match_string(date, weekday_names[i]);
+ if (match >= 3) {
+ tm->tm_wday = i;
+ return match;
+ }
+ }
+
+ for (i = 0; i < NR_TZ; i++) {
+ int match = match_string(date, timezone_names[i].name);
+ if (match >= 3) {
+ int off = timezone_names[i].offset;
+
+ /* This is bogus, but we like summer */
+ off += timezone_names[i].dst;
+
+ /* Only use the tz name offset if we don't have anything better */
+ if (*offset == -1)
+ *offset = 60*off;
+
+ return match;
+ }
+ }
+
+ if (match_string(date, "PM") == 2) {
+ if (tm->tm_hour > 0 && tm->tm_hour < 12)
+ tm->tm_hour += 12;
+ return 2;
+ }
+
+ /* BAD CRAP */
+ return skip_alpha(date);
+}
+
+static int is_date(int year, int month, int day, struct tm *tm)
+{
+ if (month > 0 && month < 13 && day > 0 && day < 32) {
+ if (year == -1) {
+ tm->tm_mon = month-1;
+ tm->tm_mday = day;
+ return 1;
+ }
+ if (year >= 1970 && year < 2100) {
+ year -= 1900;
+ } else if (year > 70 && year < 100) {
+ /* ok */
+ } else if (year < 38) {
+ year += 100;
+ } else
+ return 0;
+
+ tm->tm_mon = month-1;
+ tm->tm_mday = day;
+ tm->tm_year = year;
+ return 1;
+ }
+ return 0;
+}
+
+static int match_multi_number(unsigned long num, char c, const char *date, char *end, struct tm *tm)
+{
+ long num2, num3;
+
+ num2 = strtol(end+1, &end, 10);
+ num3 = -1;
+ if (*end == c && isdigit(end[1]))
+ num3 = strtol(end+1, &end, 10);
+
+ /* Time? Date? */
+ switch (c) {
+ case ':':
+ if (num3 < 0)
+ num3 = 0;
+ if (num < 25 && num2 >= 0 && num2 < 60 && num3 >= 0 && num3 <= 60) {
+ tm->tm_hour = num;
+ tm->tm_min = num2;
+ tm->tm_sec = num3;
+ break;
+ }
+ return 0;
+
+ case '-':
+ case '/':
+ if (num > 70) {
+ /* yyyy-mm-dd? */
+ if (is_date(num, num2, num3, tm))
+ break;
+ /* yyyy-dd-mm? */
+ if (is_date(num, num3, num2, tm))
+ break;
+ }
+ /* mm/dd/yy ? */
+ if (is_date(num3, num2, num, tm))
+ break;
+ /* dd/mm/yy ? */
+ if (is_date(num3, num, num2, tm))
+ break;
+ return 0;
+ }
+ return end - date;
+}
+
+/*
+ * We've seen a digit. Time? Year? Date?
+ */
+static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt)
+{
+ int n;
+ char *end;
+ unsigned long num;
+
+ num = strtoul(date, &end, 10);
+
+ /*
+ * Seconds since 1970? We trigger on that for anything after Jan 1, 2000
+ */
+ if (num > 946684800) {
+ time_t time = num;
+ if (gmtime_r(&time, tm)) {
+ *tm_gmt = 1;
+ return end - date;
+ }
+ }
+
+ /*
+ * Check for special formats: num[:-/]num[same]num
+ */
+ switch (*end) {
+ case ':':
+ case '/':
+ case '-':
+ if (isdigit(end[1])) {
+ int match = match_multi_number(num, *end, date, end, tm);
+ if (match)
+ return match;
+ }
+ }
+
+ /*
+ * None of the special formats? Try to guess what
+ * the number meant. We use the number of digits
+ * to make a more educated guess..
+ */
+ n = 0;
+ do {
+ n++;
+ } while (isdigit(date[n]));
+
+ /* Four-digit year or a timezone? */
+ if (n == 4) {
+ if (num <= 1200 && *offset == -1) {
+ unsigned int minutes = num % 100;
+ unsigned int hours = num / 100;
+ *offset = hours*60 + minutes;
+ } else if (num > 1900 && num < 2100)
+ tm->tm_year = num - 1900;
+ return n;
+ }
+
+ /*
+ * NOTE! We will give precedence to day-of-month over month or
+ * year numebers in the 1-12 range. So 05 is always "mday 5",
+ * unless we already have a mday..
+ *
+ * IOW, 01 Apr 05 parses as "April 1st, 2005".
+ */
+ if (num > 0 && num < 32 && tm->tm_mday < 0) {
+ tm->tm_mday = num;
+ return n;
+ }
+
+ /* Two-digit year? */
+ if (n == 2 && tm->tm_year < 0) {
+ if (num < 10 && tm->tm_mday >= 0) {
+ tm->tm_year = num + 100;
+ return n;
+ }
+ if (num >= 70) {
+ tm->tm_year = num;
+ return n;
+ }
+ }
+
+ if (num > 0 && num < 32) {
+ tm->tm_mday = num;
+ } else if (num > 1900) {
+ tm->tm_year = num - 1900;
+ } else if (num > 70) {
+ tm->tm_year = num;
+ } else if (num > 0 && num < 13) {
+ tm->tm_mon = num-1;
+ }
+
+ return n;
+}
+
+static int match_tz(const char *date, int *offp)
+{
+ char *end;
+ int offset = strtoul(date+1, &end, 10);
+ int min, hour;
+ int n = end - date - 1;
+
+ min = offset % 100;
+ hour = offset / 100;
+
+ /*
+ * Don't accept any random crap.. At least 3 digits, and
+ * a valid minute. We might want to check that the minutes
+ * are divisible by 30 or something too.
+ */
+ if (min < 60 && n > 2) {
+ offset = hour*60+min;
+ if (*date == '-')
+ offset = -offset;
+
+ *offp = offset;
+ }
+ return end - date;
+}
+
+/* Gr. strptime is crap for this; it doesn't have a way to require RFC2822
+ (i.e. English) day/month names, and it doesn't work correctly with %z. */
+void parse_date(const char *date, char *result, int maxlen)
+{
+ struct tm tm;
+ int offset, sign, tm_gmt;
+ time_t then;
+
+ memset(&tm, 0, sizeof(tm));
+ tm.tm_year = -1;
+ tm.tm_mon = -1;
+ tm.tm_mday = -1;
+ tm.tm_isdst = -1;
+ offset = -1;
+ tm_gmt = 0;
+
+ for (;;) {
+ int match = 0;
+ unsigned char c = *date;
+
+ /* Stop at end of string or newline */
+ if (!c || c == '\n')
+ break;
+
+ if (isalpha(c))
+ match = match_alpha(date, &tm, &offset);
+ else if (isdigit(c))
+ match = match_digit(date, &tm, &offset, &tm_gmt);
+ else if ((c == '-' || c == '+') && isdigit(date[1]))
+ match = match_tz(date, &offset);
+
+ if (!match) {
+ /* BAD CRAP */
+ match = 1;
+ }
+
+ date += match;
+ }
+
+ /* mktime uses local timezone */
+ then = my_mktime(&tm);
+ if (offset == -1)
+ offset = (then - mktime(&tm)) / 60;
+
+ if (then == -1)
+ return;
+
+ if (!tm_gmt)
+ then -= offset * 60;
+
+ sign = '+';
+ if (offset < 0) {
+ offset = -offset;
+ sign = '-';
+ }
+
+ snprintf(result, maxlen, "%lu %c%02d%02d", then, sign, offset/60, offset % 60);
+}
+
+void datestamp(char *buf, int bufsize)
+{
+ time_t now;
+ int offset;
+
+ time(&now);
+
+ offset = my_mktime(localtime(&now)) - now;
+ offset /= 60;
+
+ snprintf(buf, bufsize, "%lu %+05d", now, offset/60*100 + offset%60);
+}
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..1e62b78
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,21 @@
+git-core (0.99-2) unstable; urgency=low
+
+ * Conflict with the GNU Interactive Tools package, which also installs
+ /usr/bin/git.
+ * Use the Mozilla SHA1 code and/or the PPC assembly in preference to
+ OpenSSL. This is only a partial fix for the license issues with OpenSSL.
+ * Minor tweaks to the Depends.
+
+ -- Ryan Anderson <ryan@michonline.com> Sat, 23 Jul 2005 14:15:00 -0400
+
+git-core (0.99-1) unstable; urgency=low
+
+ * Update deb package support to build correctly.
+
+ -- Ryan Anderson <ryan@michonline.com> Thu, 21 Jul 2005 02:03:32 -0400
+
+git-core (0.99-0) unstable; urgency=low
+
+ * Initial deb package support
+
+ -- Eric Biederman <ebiederm@xmission.com> Tue, 12 Jul 2005 10:57:51 -0600
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..b8626c4
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+4
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..98c81c7
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,19 @@
+Source: git-core
+Section: devel
+Priority: optional
+Maintainer: Linus Torvalds <torvalds@osdl.org>
+Build-Depends-Indep: libz-dev, libssl-dev, libcurl3-dev, asciidoc > 6.0.3, xmlto, debhelper (>= 4.0.0)
+Standards-Version: 3.6.1
+
+Package: git-core
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}, patch, diff, rcs
+Recommends: rsync, curl, ssh
+Conflicts: git
+Description: The git content addressable filesystem
+ GIT comes in two layers. The bottom layer is merely an extremely fast
+ and flexible filesystem-based database designed to store directory trees
+ with regard to their history. The top layer is a SCM-like tool which
+ enables human beings to work with the database in a manner to a degree
+ similar to other SCM tools (like CVS, BitKeeper or Monotone).
+
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..32b7e9c
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,3 @@
+License:
+
+GPL v2 (see COPYING for details)
diff --git a/debian/docs b/debian/docs
new file mode 100644
index 0000000..a252d0f
--- /dev/null
+++ b/debian/docs
@@ -0,0 +1,3 @@
+README
+COPYING
+
diff --git a/debian/git-core.doc-base b/debian/git-core.doc-base
new file mode 100644
index 0000000..e104671
--- /dev/null
+++ b/debian/git-core.doc-base
@@ -0,0 +1,12 @@
+Document: git-core
+Title: git-core
+Author:
+Abstract: This manual describes git
+Section: Devel
+
+Format: HTML
+Index: /usr/share/doc/git-core/html/git.html
+Files: /usr/share/doc/git-core/html/*.html
+
+Format: text
+Files: /usr/share/doc/git-core/git-core.txt
diff --git a/debian/git-core.install b/debian/git-core.install
new file mode 100644
index 0000000..72e8ffc
--- /dev/null
+++ b/debian/git-core.install
@@ -0,0 +1 @@
+*
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..67830b0
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,95 @@
+#!/usr/bin/make -f
+# -*- makefile -*-
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+
+CFLAGS = -g -Wall
+ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
+ CFLAGS += -O0
+else
+ CFLAGS += -O2
+endif
+export CFLAGS
+
+#
+# On PowerPC we compile against the hand-crafted assembly, on all
+# other architectures we compile against GPL'ed sha1 code lifted
+# from Mozilla. OpenSSL is strangely licensed and best avoided
+# in Debian.
+#
+HOST_ARCH=$(shell dpkg-architecture -qDEB_HOST_ARCH)
+ifeq (${HOST_ARCH},powerpc)
+ export PPC_SHA1=YesPlease
+else
+ export MOZILLA_SHA1=YesPlease
+endif
+
+
+PREFIX := /usr
+MANDIR := /usr/share/man/
+
+SRC := ./
+DOC := Documentation/
+DESTDIR := $(CURDIR)/debian/tmp
+DOC_DESTDIR := $(DESTDIR)/usr/share/doc/git-core/
+MAN_DESTDIR := $(DESTDIR)/$(MANDIR)
+
+build: debian/build-stamp
+debian/build-stamp:
+ dh_testdir
+ $(MAKE) all doc
+ touch debian/build-stamp
+
+debian-clean:
+ dh_testdir
+ dh_testroot
+ rm -f debian/build-stamp
+ dh_clean
+
+clean: debian-clean
+ $(MAKE) clean
+
+install: build
+ dh_testdir
+ dh_testroot
+ dh_clean -k
+ dh_installdirs
+
+ make dest=$(DESTDIR) prefix=$(PREFIX) mandir=$(MANDIR) install install-doc
+
+ mkdir -p $(DOC_DESTDIR)
+ find $(DOC) '(' -name '*.txt' -o -name '*.html' ')' -exec install {} $(DOC_DESTDIR) ';'
+
+ dh_install --list-missing --sourcedir=$(DESTDIR)
+
+binary: build install
+ dh_testdir
+ dh_testroot
+ dh_installchangelogs
+ dh_installdocs
+ dh_installexamples
+# dh_installmenu
+# dh_installdebconf
+# dh_installlogrotate
+# dh_installemacsen
+# dh_installpam
+# dh_installmime
+# dh_installinit
+# dh_installcron
+# dh_installinfo
+ dh_installman
+ dh_link
+ dh_strip
+ dh_compress
+ dh_fixperms
+# dh_perl
+# dh_python
+ dh_makeshlibs
+ dh_installdeb
+ dh_shlibdeps
+ dh_gencontrol
+ dh_md5sums
+ dh_builddeb
+
+.PHONY: build clean binary install clean debian-clean
diff --git a/delta.h b/delta.h
new file mode 100644
index 0000000..31d1820
--- /dev/null
+++ b/delta.h
@@ -0,0 +1,34 @@
+#ifndef DELTA_H
+#define DELTA_H
+
+/* handling of delta buffers */
+extern void *diff_delta(void *from_buf, unsigned long from_size,
+ void *to_buf, unsigned long to_size,
+ unsigned long *delta_size, unsigned long max_size);
+extern void *patch_delta(void *src_buf, unsigned long src_size,
+ void *delta_buf, unsigned long delta_size,
+ unsigned long *dst_size);
+
+/* the smallest possible delta size is 4 bytes */
+#define DELTA_SIZE_MIN 4
+
+/*
+ * This must be called twice on the delta data buffer, first to get the
+ * expected reference buffer size, and again to get the result buffer size.
+ */
+static inline unsigned long get_delta_hdr_size(const unsigned char **datap)
+{
+ const unsigned char *data = *datap;
+ unsigned char cmd = *data++;
+ unsigned long size = cmd & ~0x80;
+ int i = 7;
+ while (cmd & 0x80) {
+ cmd = *data++;
+ size |= (cmd & ~0x80) << i;
+ i += 7;
+ }
+ *datap = data;
+ return size;
+}
+
+#endif
diff --git a/diff-cache.c b/diff-cache.c
new file mode 100644
index 0000000..e3c4c81
--- /dev/null
+++ b/diff-cache.c
@@ -0,0 +1,294 @@
+#include "cache.h"
+#include "diff.h"
+
+static int cached_only = 0;
+static int diff_output_format = DIFF_FORMAT_RAW;
+static int diff_line_termination = '\n';
+static int match_nonexisting = 0;
+static int detect_rename = 0;
+static int find_copies_harder = 0;
+static int diff_setup_opt = 0;
+static int diff_score_opt = 0;
+static const char *pickaxe = NULL;
+static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
+static const char *orderfile = NULL;
+static const char *diff_filter = NULL;
+
+/* A file entry went away or appeared */
+static void show_file(const char *prefix, struct cache_entry *ce, unsigned char *sha1, unsigned int mode)
+{
+ diff_addremove(prefix[0], ntohl(mode), sha1, ce->name, NULL);
+}
+
+static int get_stat_data(struct cache_entry *ce, unsigned char **sha1p, unsigned int *modep)
+{
+ unsigned char *sha1 = ce->sha1;
+ unsigned int mode = ce->ce_mode;
+
+ if (!cached_only) {
+ static unsigned char no_sha1[20];
+ int changed;
+ struct stat st;
+ if (lstat(ce->name, &st) < 0) {
+ if (errno == ENOENT && match_nonexisting) {
+ *sha1p = sha1;
+ *modep = mode;
+ return 0;
+ }
+ return -1;
+ }
+ changed = ce_match_stat(ce, &st);
+ if (changed) {
+ mode = create_ce_mode(st.st_mode);
+ sha1 = no_sha1;
+ }
+ }
+
+ *sha1p = sha1;
+ *modep = mode;
+ return 0;
+}
+
+static void show_new_file(struct cache_entry *new)
+{
+ unsigned char *sha1;
+ unsigned int mode;
+
+ /* New file in the index: it might actually be different in the working copy */
+ if (get_stat_data(new, &sha1, &mode) < 0)
+ return;
+
+ show_file("+", new, sha1, mode);
+}
+
+static int show_modified(struct cache_entry *old,
+ struct cache_entry *new,
+ int report_missing)
+{
+ unsigned int mode, oldmode;
+ unsigned char *sha1;
+
+ if (get_stat_data(new, &sha1, &mode) < 0) {
+ if (report_missing)
+ show_file("-", old, old->sha1, old->ce_mode);
+ return -1;
+ }
+
+ oldmode = old->ce_mode;
+ if (mode == oldmode && !memcmp(sha1, old->sha1, 20) &&
+ !find_copies_harder)
+ return 0;
+
+ mode = ntohl(mode);
+ oldmode = ntohl(oldmode);
+
+ diff_change(oldmode, mode,
+ old->sha1, sha1, old->name, NULL);
+ return 0;
+}
+
+static int diff_cache(struct cache_entry **ac, int entries, const char **pathspec)
+{
+ while (entries) {
+ struct cache_entry *ce = *ac;
+ int same = (entries > 1) && ce_same_name(ce, ac[1]);
+
+ if (!ce_path_match(ce, pathspec))
+ goto skip_entry;
+
+ switch (ce_stage(ce)) {
+ case 0:
+ /* No stage 1 entry? That means it's a new file */
+ if (!same) {
+ show_new_file(ce);
+ break;
+ }
+ /* Show difference between old and new */
+ show_modified(ac[1], ce, 1);
+ break;
+ case 1:
+ /* No stage 3 (merge) entry? That means it's been deleted */
+ if (!same) {
+ show_file("-", ce, ce->sha1, ce->ce_mode);
+ break;
+ }
+ /* We come here with ce pointing at stage 1
+ * (original tree) and ac[1] pointing at stage
+ * 3 (unmerged). show-modified with
+ * report-mising set to false does not say the
+ * file is deleted but reports true if work
+ * tree does not have it, in which case we
+ * fall through to report the unmerged state.
+ * Otherwise, we show the differences between
+ * the original tree and the work tree.
+ */
+ if (!cached_only && !show_modified(ce, ac[1], 0))
+ break;
+ /* fallthru */
+ case 3:
+ diff_unmerge(ce->name);
+ break;
+
+ default:
+ die("impossible cache entry stage");
+ }
+
+skip_entry:
+ /*
+ * Ignore all the different stages for this file,
+ * we've handled the relevant cases now.
+ */
+ do {
+ ac++;
+ entries--;
+ } while (entries && ce_same_name(ce, ac[0]));
+ }
+ return 0;
+}
+
+/*
+ * This turns all merge entries into "stage 3". That guarantees that
+ * when we read in the new tree (into "stage 1"), we won't lose sight
+ * of the fact that we had unmerged entries.
+ */
+static void mark_merge_entries(void)
+{
+ int i;
+ for (i = 0; i < active_nr; i++) {
+ struct cache_entry *ce = active_cache[i];
+ if (!ce_stage(ce))
+ continue;
+ ce->ce_flags |= htons(CE_STAGEMASK);
+ }
+}
+
+static char *diff_cache_usage =
+"git-diff-cache [-m] [--cached] "
+"[<common diff options>] <tree-ish> [<path>...]"
+COMMON_DIFF_OPTIONS_HELP;
+
+int main(int argc, const char **argv)
+{
+ const char *tree_name = NULL;
+ unsigned char sha1[20];
+ const char **pathspec = NULL;
+ void *tree;
+ unsigned long size;
+ int ret;
+ int allow_options = 1;
+ int i;
+
+ read_cache();
+ for (i = 1; i < argc; i++) {
+ const char *arg = argv[i];
+
+ if (!allow_options || *arg != '-') {
+ if (tree_name) {
+ pathspec = argv + i;
+ break;
+ }
+ tree_name = arg;
+ continue;
+ }
+
+ if (!strcmp(arg, "--")) {
+ allow_options = 0;
+ continue;
+ }
+ if (!strcmp(arg, "-r")) {
+ /* We accept the -r flag just to look like git-diff-tree */
+ continue;
+ }
+ /* We accept the -u flag as a synonym for "-p" */
+ if (!strcmp(arg, "-p") || !strcmp(arg, "-u")) {
+ diff_output_format = DIFF_FORMAT_PATCH;
+ continue;
+ }
+ if (!strncmp(arg, "-B", 2)) {
+ if ((diff_break_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_cache_usage);
+ continue;
+ }
+ if (!strncmp(arg, "-M", 2)) {
+ detect_rename = DIFF_DETECT_RENAME;
+ if ((diff_score_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_cache_usage);
+ continue;
+ }
+ if (!strncmp(arg, "-C", 2)) {
+ detect_rename = DIFF_DETECT_COPY;
+ if ((diff_score_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_cache_usage);
+ continue;
+ }
+ if (!strcmp(arg, "--find-copies-harder")) {
+ find_copies_harder = 1;
+ continue;
+ }
+ if (!strcmp(arg, "-z")) {
+ diff_line_termination = 0;
+ continue;
+ }
+ if (!strcmp(arg, "--name-only")) {
+ diff_output_format = DIFF_FORMAT_NAME;
+ continue;
+ }
+ if (!strcmp(arg, "-R")) {
+ diff_setup_opt |= DIFF_SETUP_REVERSE;
+ continue;
+ }
+ if (!strncmp(arg, "-S", 2)) {
+ pickaxe = arg + 2;
+ continue;
+ }
+ if (!strncmp(arg, "--diff-filter=", 14)) {
+ diff_filter = arg + 14;
+ continue;
+ }
+ if (!strncmp(arg, "-O", 2)) {
+ orderfile = arg + 2;
+ continue;
+ }
+ if (!strcmp(arg, "--pickaxe-all")) {
+ pickaxe_opts = DIFF_PICKAXE_ALL;
+ continue;
+ }
+ if (!strcmp(arg, "-m")) {
+ match_nonexisting = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--cached")) {
+ cached_only = 1;
+ continue;
+ }
+ usage(diff_cache_usage);
+ }
+
+ if (find_copies_harder && detect_rename != DIFF_DETECT_COPY)
+ usage(diff_cache_usage);
+
+ if (!tree_name || get_sha1(tree_name, sha1))
+ usage(diff_cache_usage);
+
+ /* The rest is for paths restriction. */
+ diff_setup(diff_setup_opt);
+
+ mark_merge_entries();
+
+ tree = read_object_with_reference(sha1, "tree", &size, NULL);
+ if (!tree)
+ die("bad tree object %s", tree_name);
+ if (read_tree(tree, size, 1, pathspec))
+ die("unable to read tree object %s", tree_name);
+
+ ret = diff_cache(active_cache, active_nr, pathspec);
+
+ diffcore_std(pathspec,
+ detect_rename, diff_score_opt,
+ pickaxe, pickaxe_opts,
+ diff_break_opt,
+ orderfile, diff_filter);
+ diff_flush(diff_output_format, diff_line_termination);
+ return ret;
+}
diff --git a/diff-delta.c b/diff-delta.c
new file mode 100644
index 0000000..b2ae7b5
--- /dev/null
+++ b/diff-delta.c
@@ -0,0 +1,334 @@
+/*
+ * diff-delta.c: generate a delta between two buffers
+ *
+ * Many parts of this file have been lifted from LibXDiff version 0.10.
+ * http://www.xmailserver.org/xdiff-lib.html
+ *
+ * LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
+ *
+ * This file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Use of this within git automatically means that the LGPL
+ * licensing gets turned into GPLv2 within this project.
+ */
+
+#include <stdlib.h>
+#include "delta.h"
+
+
+/* block size: min = 16, max = 64k, power of 2 */
+#define BLK_SIZE 16
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+#define GR_PRIME 0x9e370001
+#define HASH(v, b) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))
+
+/* largest prime smaller than 65536 */
+#define BASE 65521
+
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+#define NMAX 5552
+
+#define DO1(buf, i) { s1 += buf[i]; s2 += s1; }
+#define DO2(buf, i) DO1(buf, i); DO1(buf, i + 1);
+#define DO4(buf, i) DO2(buf, i); DO2(buf, i + 2);
+#define DO8(buf, i) DO4(buf, i); DO4(buf, i + 4);
+#define DO16(buf) DO8(buf, 0); DO8(buf, 8);
+
+static unsigned int adler32(unsigned int adler, const unsigned char *buf, int len)
+{
+ int k;
+ unsigned int s1 = adler & 0xffff;
+ unsigned int s2 = adler >> 16;
+
+ while (len > 0) {
+ k = MIN(len, NMAX);
+ len -= k;
+ while (k >= 16) {
+ DO16(buf);
+ buf += 16;
+ k -= 16;
+ }
+ if (k != 0)
+ do {
+ s1 += *buf++;
+ s2 += s1;
+ } while (--k);
+ s1 %= BASE;
+ s2 %= BASE;
+ }
+
+ return (s2 << 16) | s1;
+}
+
+static unsigned int hashbits(unsigned int size)
+{
+ unsigned int val = 1, bits = 0;
+ while (val < size && bits < 32) {
+ val <<= 1;
+ bits++;
+ }
+ return bits ? bits: 1;
+}
+
+typedef struct s_chanode {
+ struct s_chanode *next;
+ int icurr;
+} chanode_t;
+
+typedef struct s_chastore {
+ chanode_t *head, *tail;
+ int isize, nsize;
+ chanode_t *ancur;
+ chanode_t *sncur;
+ int scurr;
+} chastore_t;
+
+static void cha_init(chastore_t *cha, int isize, int icount)
+{
+ cha->head = cha->tail = NULL;
+ cha->isize = isize;
+ cha->nsize = icount * isize;
+ cha->ancur = cha->sncur = NULL;
+ cha->scurr = 0;
+}
+
+static void *cha_alloc(chastore_t *cha)
+{
+ chanode_t *ancur;
+ void *data;
+
+ ancur = cha->ancur;
+ if (!ancur || ancur->icurr == cha->nsize) {
+ ancur = malloc(sizeof(chanode_t) + cha->nsize);
+ if (!ancur)
+ return NULL;
+ ancur->icurr = 0;
+ ancur->next = NULL;
+ if (cha->tail)
+ cha->tail->next = ancur;
+ if (!cha->head)
+ cha->head = ancur;
+ cha->tail = ancur;
+ cha->ancur = ancur;
+ }
+
+ data = (void *)ancur + sizeof(chanode_t) + ancur->icurr;
+ ancur->icurr += cha->isize;
+ return data;
+}
+
+static void cha_free(chastore_t *cha)
+{
+ chanode_t *cur = cha->head;
+ while (cur) {
+ chanode_t *tmp = cur;
+ cur = cur->next;
+ free(tmp);
+ }
+}
+
+typedef struct s_bdrecord {
+ struct s_bdrecord *next;
+ unsigned int fp;
+ const unsigned char *ptr;
+} bdrecord_t;
+
+typedef struct s_bdfile {
+ const unsigned char *data, *top;
+ chastore_t cha;
+ unsigned int fphbits;
+ bdrecord_t **fphash;
+} bdfile_t;
+
+static int delta_prepare(const unsigned char *buf, int bufsize, bdfile_t *bdf)
+{
+ unsigned int fphbits;
+ int i, hsize;
+ const unsigned char *base, *data, *top;
+ bdrecord_t *brec;
+ bdrecord_t **fphash;
+
+ fphbits = hashbits(bufsize / BLK_SIZE + 1);
+ hsize = 1 << fphbits;
+ fphash = malloc(hsize * sizeof(bdrecord_t *));
+ if (!fphash)
+ return -1;
+ for (i = 0; i < hsize; i++)
+ fphash[i] = NULL;
+ cha_init(&bdf->cha, sizeof(bdrecord_t), hsize / 4 + 1);
+
+ bdf->data = data = base = buf;
+ bdf->top = top = buf + bufsize;
+ data += (bufsize / BLK_SIZE) * BLK_SIZE;
+ if (data == top)
+ data -= BLK_SIZE;
+
+ for ( ; data >= base; data -= BLK_SIZE) {
+ brec = cha_alloc(&bdf->cha);
+ if (!brec) {
+ cha_free(&bdf->cha);
+ free(fphash);
+ return -1;
+ }
+ brec->fp = adler32(0, data, MIN(BLK_SIZE, top - data));
+ brec->ptr = data;
+ i = HASH(brec->fp, fphbits);
+ brec->next = fphash[i];
+ fphash[i] = brec;
+ }
+
+ bdf->fphbits = fphbits;
+ bdf->fphash = fphash;
+
+ return 0;
+}
+
+static void delta_cleanup(bdfile_t *bdf)
+{
+ free(bdf->fphash);
+ cha_free(&bdf->cha);
+}
+
+#define COPYOP_SIZE(o, s) \
+ (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
+ !!(s & 0xff) + !!(s & 0xff00) + 1)
+
+void *diff_delta(void *from_buf, unsigned long from_size,
+ void *to_buf, unsigned long to_size,
+ unsigned long *delta_size,
+ unsigned long max_size)
+{
+ int i, outpos, outsize, inscnt, csize, msize, moff;
+ unsigned int fp;
+ const unsigned char *data, *top, *ptr1, *ptr2;
+ unsigned char *out, *orig;
+ bdrecord_t *brec;
+ bdfile_t bdf;
+
+ if (!from_size || !to_size || delta_prepare(from_buf, from_size, &bdf))
+ return NULL;
+
+ outpos = 0;
+ outsize = 8192;
+ out = malloc(outsize);
+ if (!out) {
+ delta_cleanup(&bdf);
+ return NULL;
+ }
+
+ data = to_buf;
+ top = to_buf + to_size;
+
+ /* store reference buffer size */
+ out[outpos++] = from_size;
+ from_size >>= 7;
+ while (from_size) {
+ out[outpos - 1] |= 0x80;
+ out[outpos++] = from_size;
+ from_size >>= 7;
+ }
+
+ /* store target buffer size */
+ out[outpos++] = to_size;
+ to_size >>= 7;
+ while (to_size) {
+ out[outpos - 1] |= 0x80;
+ out[outpos++] = to_size;
+ to_size >>= 7;
+ }
+
+ inscnt = 0;
+ moff = 0;
+ while (data < top) {
+ msize = 0;
+ fp = adler32(0, data, MIN(top - data, BLK_SIZE));
+ i = HASH(fp, bdf.fphbits);
+ for (brec = bdf.fphash[i]; brec; brec = brec->next) {
+ if (brec->fp == fp) {
+ csize = bdf.top - brec->ptr;
+ if (csize > top - data)
+ csize = top - data;
+ for (ptr1 = brec->ptr, ptr2 = data;
+ csize && *ptr1 == *ptr2;
+ csize--, ptr1++, ptr2++);
+
+ csize = ptr1 - brec->ptr;
+ if (csize > msize) {
+ moff = brec->ptr - bdf.data;
+ msize = csize;
+ if (msize >= 0x10000) {
+ msize = 0x10000;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!msize || msize < COPYOP_SIZE(moff, msize)) {
+ if (!inscnt)
+ outpos++;
+ out[outpos++] = *data++;
+ inscnt++;
+ if (inscnt == 0x7f) {
+ out[outpos - inscnt - 1] = inscnt;
+ inscnt = 0;
+ }
+ } else {
+ if (inscnt) {
+ out[outpos - inscnt - 1] = inscnt;
+ inscnt = 0;
+ }
+
+ data += msize;
+ orig = out + outpos++;
+ i = 0x80;
+
+ if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
+ moff >>= 8;
+ if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
+ moff >>= 8;
+ if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
+ moff >>= 8;
+ if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
+
+ if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
+ msize >>= 8;
+ if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
+
+ *orig = i;
+ }
+
+ if (max_size && outpos > max_size) {
+ free(out);
+ delta_cleanup(&bdf);
+ return NULL;
+ }
+
+ /* next time around the largest possible output is 1 + 4 + 3 */
+ if (outpos > outsize - 8) {
+ void *tmp = out;
+ outsize = outsize * 3 / 2;
+ out = realloc(out, outsize);
+ if (!out) {
+ free(tmp);
+ delta_cleanup(&bdf);
+ return NULL;
+ }
+ }
+ }
+
+ if (inscnt)
+ out[outpos - inscnt - 1] = inscnt;
+
+ delta_cleanup(&bdf);
+ *delta_size = outpos;
+ return out;
+}
diff --git a/diff-files.c b/diff-files.c
new file mode 100644
index 0000000..39dd54f
--- /dev/null
+++ b/diff-files.c
@@ -0,0 +1,157 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "cache.h"
+#include "diff.h"
+
+static const char *diff_files_usage =
+"git-diff-files [-q] "
+"[<common diff options>] [<path>...]"
+COMMON_DIFF_OPTIONS_HELP;
+
+static int diff_output_format = DIFF_FORMAT_RAW;
+static int diff_line_termination = '\n';
+static int detect_rename = 0;
+static int find_copies_harder = 0;
+static int diff_setup_opt = 0;
+static int diff_score_opt = 0;
+static const char *pickaxe = NULL;
+static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
+static const char *orderfile = NULL;
+static const char *diff_filter = NULL;
+static int silent = 0;
+
+static void show_unmerge(const char *path)
+{
+ diff_unmerge(path);
+}
+
+static void show_file(int pfx, struct cache_entry *ce)
+{
+ diff_addremove(pfx, ntohl(ce->ce_mode), ce->sha1, ce->name, NULL);
+}
+
+static void show_modified(int oldmode, int mode,
+ const unsigned char *old_sha1, const unsigned char *sha1,
+ char *path)
+{
+ diff_change(oldmode, mode, old_sha1, sha1, path, NULL);
+}
+
+int main(int argc, const char **argv)
+{
+ static const unsigned char null_sha1[20] = { 0, };
+ const char **pathspec;
+ int entries = read_cache();
+ int i;
+
+ while (1 < argc && argv[1][0] == '-') {
+ if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "-u"))
+ diff_output_format = DIFF_FORMAT_PATCH;
+ else if (!strcmp(argv[1], "-q"))
+ silent = 1;
+ else if (!strcmp(argv[1], "-r"))
+ ; /* no-op */
+ else if (!strcmp(argv[1], "-s"))
+ ; /* no-op */
+ else if (!strcmp(argv[1], "-z"))
+ diff_line_termination = 0;
+ else if (!strcmp(argv[1], "--name-only"))
+ diff_output_format = DIFF_FORMAT_NAME;
+ else if (!strcmp(argv[1], "-R"))
+ diff_setup_opt |= DIFF_SETUP_REVERSE;
+ else if (!strncmp(argv[1], "-S", 2))
+ pickaxe = argv[1] + 2;
+ else if (!strncmp(argv[1], "-O", 2))
+ orderfile = argv[1] + 2;
+ else if (!strncmp(argv[1], "--diff-filter=", 14))
+ diff_filter = argv[1] + 14;
+ else if (!strcmp(argv[1], "--pickaxe-all"))
+ pickaxe_opts = DIFF_PICKAXE_ALL;
+ else if (!strncmp(argv[1], "-B", 2)) {
+ if ((diff_break_opt =
+ diff_scoreopt_parse(argv[1])) == -1)
+ usage(diff_files_usage);
+ }
+ else if (!strncmp(argv[1], "-M", 2)) {
+ if ((diff_score_opt =
+ diff_scoreopt_parse(argv[1])) == -1)
+ usage(diff_files_usage);
+ detect_rename = DIFF_DETECT_RENAME;
+ }
+ else if (!strncmp(argv[1], "-C", 2)) {
+ if ((diff_score_opt =
+ diff_scoreopt_parse(argv[1])) == -1)
+ usage(diff_files_usage);
+ detect_rename = DIFF_DETECT_COPY;
+ }
+ else if (!strcmp(argv[1], "--find-copies-harder"))
+ find_copies_harder = 1;
+ else
+ usage(diff_files_usage);
+ argv++; argc--;
+ }
+
+ /* Do we have a pathspec? */
+ pathspec = (argc > 1) ? argv + 1 : NULL;
+
+ if (find_copies_harder && detect_rename != DIFF_DETECT_COPY)
+ usage(diff_files_usage);
+
+ /* At this point, if argc == 1, then we are doing everything.
+ * Otherwise argv[1] .. argv[argc-1] have the explicit paths.
+ */
+ if (entries < 0) {
+ perror("read_cache");
+ exit(1);
+ }
+
+ diff_setup(diff_setup_opt);
+
+ for (i = 0; i < entries; i++) {
+ struct stat st;
+ unsigned int oldmode;
+ struct cache_entry *ce = active_cache[i];
+ int changed;
+
+ if (!ce_path_match(ce, pathspec))
+ continue;
+
+ if (ce_stage(ce)) {
+ show_unmerge(ce->name);
+ while (i < entries &&
+ !strcmp(ce->name, active_cache[i]->name))
+ i++;
+ i--; /* compensate for loop control increments */
+ continue;
+ }
+
+ if (lstat(ce->name, &st) < 0) {
+ if (errno != ENOENT && errno != ENOTDIR) {
+ perror(ce->name);
+ continue;
+ }
+ if (silent)
+ continue;
+ show_file('-', ce);
+ continue;
+ }
+ changed = ce_match_stat(ce, &st);
+ if (!changed && !find_copies_harder)
+ continue;
+ oldmode = ntohl(ce->ce_mode);
+ show_modified(oldmode, DIFF_FILE_CANON_MODE(st.st_mode),
+ ce->sha1, (changed ? null_sha1 : ce->sha1),
+ ce->name);
+ }
+ diffcore_std(pathspec,
+ detect_rename, diff_score_opt,
+ pickaxe, pickaxe_opts,
+ diff_break_opt,
+ orderfile, diff_filter);
+ diff_flush(diff_output_format, diff_line_termination);
+ return 0;
+}
diff --git a/diff-helper.c b/diff-helper.c
new file mode 100644
index 0000000..07ccd7c
--- /dev/null
+++ b/diff-helper.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "strbuf.h"
+#include "diff.h"
+
+static const char *pickaxe = NULL;
+static int pickaxe_opts = 0;
+static const char *orderfile = NULL;
+static const char *diff_filter = NULL;
+static int line_termination = '\n';
+static int inter_name_termination = '\t';
+
+static void flush_them(int ac, const char **av)
+{
+ diffcore_std_no_resolve(av + 1,
+ pickaxe, pickaxe_opts,
+ orderfile, diff_filter);
+ diff_flush(DIFF_FORMAT_PATCH, '\n');
+}
+
+static const char *diff_helper_usage =
+"git-diff-helper [-z] [-O<orderfile>] [-S<string>] [--pickaxe-all] [<path>...]";
+
+int main(int ac, const char **av) {
+ struct strbuf sb;
+ const char *garbage_flush_format;
+
+ strbuf_init(&sb);
+
+ while (1 < ac && av[1][0] == '-') {
+ if (av[1][1] == 'z')
+ line_termination = inter_name_termination = 0;
+ else if (av[1][1] == 'S') {
+ pickaxe = av[1] + 2;
+ }
+ else if (!strcmp(av[1], "--pickaxe-all"))
+ pickaxe_opts = DIFF_PICKAXE_ALL;
+ else if (!strncmp(av[1], "--diff-filter=", 14))
+ diff_filter = av[1] + 14;
+ else if (!strncmp(av[1], "-O", 2))
+ orderfile = av[1] + 2;
+ else
+ usage(diff_helper_usage);
+ ac--; av++;
+ }
+ garbage_flush_format = (line_termination == 0) ? "%s" : "%s\n";
+
+ /* the remaining parameters are paths patterns */
+
+ diff_setup(0);
+ while (1) {
+ unsigned old_mode, new_mode;
+ unsigned char old_sha1[20], new_sha1[20];
+ char old_path[PATH_MAX];
+ int status, score, two_paths;
+ char new_path[PATH_MAX];
+
+ int ch;
+ char *cp, *ep;
+
+ read_line(&sb, stdin, line_termination);
+ if (sb.eof)
+ break;
+ switch (sb.buf[0]) {
+ case ':':
+ /* parse the first part up to the status */
+ cp = sb.buf + 1;
+ old_mode = new_mode = 0;
+ while ((ch = *cp) && ('0' <= ch && ch <= '7')) {
+ old_mode = (old_mode << 3) | (ch - '0');
+ cp++;
+ }
+ if (*cp++ != ' ')
+ break;
+ while ((ch = *cp) && ('0' <= ch && ch <= '7')) {
+ new_mode = (new_mode << 3) | (ch - '0');
+ cp++;
+ }
+ if (*cp++ != ' ')
+ break;
+ if (get_sha1_hex(cp, old_sha1))
+ break;
+ cp += 40;
+ if (*cp++ != ' ')
+ break;
+ if (get_sha1_hex(cp, new_sha1))
+ break;
+ cp += 40;
+ if (*cp++ != ' ')
+ break;
+ status = *cp++;
+ if (!strchr("MCRNDU", status))
+ break;
+ two_paths = score = 0;
+ if (status == DIFF_STATUS_RENAMED ||
+ status == DIFF_STATUS_COPIED)
+ two_paths = 1;
+
+ /* pick up score if exists */
+ if (sscanf(cp, "%d", &score) != 1)
+ score = 0;
+ cp = strchr(cp,
+ inter_name_termination);
+ if (!cp)
+ break;
+ if (*cp++ != inter_name_termination)
+ break;
+
+ /* first pathname */
+ if (!line_termination) {
+ read_line(&sb, stdin, line_termination);
+ if (sb.eof)
+ break;
+ strcpy(old_path, sb.buf);
+ }
+ else if (!two_paths)
+ strcpy(old_path, cp);
+ else {
+ ep = strchr(cp, inter_name_termination);
+ if (!ep)
+ break;
+ strncpy(old_path, cp, ep-cp);
+ old_path[ep-cp] = 0;
+ cp = ep + 1;
+ }
+
+ /* second pathname */
+ if (!two_paths)
+ strcpy(new_path, old_path);
+ else {
+ if (!line_termination) {
+ read_line(&sb, stdin,
+ line_termination);
+ if (sb.eof)
+ break;
+ strcpy(new_path, sb.buf);
+ }
+ else
+ strcpy(new_path, cp);
+ }
+ diff_helper_input(old_mode, new_mode,
+ old_sha1, new_sha1,
+ old_path, status, score,
+ new_path);
+ continue;
+ }
+ flush_them(ac, av);
+ printf(garbage_flush_format, sb.buf);
+ }
+ flush_them(ac, av);
+ return 0;
+}
diff --git a/diff-stages.c b/diff-stages.c
new file mode 100644
index 0000000..cbaba55
--- /dev/null
+++ b/diff-stages.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2005 Junio C Hamano
+ */
+
+#include "cache.h"
+#include "diff.h"
+
+static int diff_output_format = DIFF_FORMAT_RAW;
+static int diff_line_termination = '\n';
+static int detect_rename = 0;
+static int find_copies_harder = 0;
+static int diff_setup_opt = 0;
+static int diff_score_opt = 0;
+static const char *pickaxe = NULL;
+static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
+static const char *orderfile = NULL;
+static const char *diff_filter = NULL;
+
+static char *diff_stages_usage =
+"git-diff-stages [<common diff options>] <stage1> <stage2> [<path>...]"
+COMMON_DIFF_OPTIONS_HELP;
+
+static void diff_stages(int stage1, int stage2)
+{
+ int i = 0;
+ while (i < active_nr) {
+ struct cache_entry *ce, *stages[4] = { NULL, };
+ struct cache_entry *one, *two;
+ const char *name;
+ int len;
+ ce = active_cache[i];
+ len = ce_namelen(ce);
+ name = ce->name;
+ for (;;) {
+ int stage = ce_stage(ce);
+ stages[stage] = ce;
+ if (active_nr <= ++i)
+ break;
+ ce = active_cache[i];
+ if (ce_namelen(ce) != len ||
+ memcmp(name, ce->name, len))
+ break;
+ }
+ one = stages[stage1];
+ two = stages[stage2];
+ if (!one && !two)
+ continue;
+ if (!one)
+ diff_addremove('+', ntohl(two->ce_mode),
+ two->sha1, name, NULL);
+ else if (!two)
+ diff_addremove('-', ntohl(one->ce_mode),
+ one->sha1, name, NULL);
+ else if (memcmp(one->sha1, two->sha1, 20) ||
+ (one->ce_mode != two->ce_mode) ||
+ find_copies_harder)
+ diff_change(ntohl(one->ce_mode), ntohl(two->ce_mode),
+ one->sha1, two->sha1, name, NULL);
+ }
+}
+
+int main(int ac, const char **av)
+{
+ int stage1, stage2;
+
+ read_cache();
+ while (1 < ac && av[1][0] == '-') {
+ const char *arg = av[1];
+ if (!strcmp(arg, "-r"))
+ ; /* as usual */
+ else if (!strcmp(arg, "-p") || !strcmp(arg, "-u"))
+ diff_output_format = DIFF_FORMAT_PATCH;
+ else if (!strncmp(arg, "-B", 2)) {
+ if ((diff_break_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_stages_usage);
+ }
+ else if (!strncmp(arg, "-M", 2)) {
+ detect_rename = DIFF_DETECT_RENAME;
+ if ((diff_score_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_stages_usage);
+ }
+ else if (!strncmp(arg, "-C", 2)) {
+ detect_rename = DIFF_DETECT_COPY;
+ if ((diff_score_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_stages_usage);
+ }
+ else if (!strcmp(arg, "--find-copies-harder"))
+ find_copies_harder = 1;
+ else if (!strcmp(arg, "-z"))
+ diff_line_termination = 0;
+ else if (!strcmp(arg, "--name-only"))
+ diff_output_format = DIFF_FORMAT_NAME;
+ else if (!strcmp(arg, "-R"))
+ diff_setup_opt |= DIFF_SETUP_REVERSE;
+ else if (!strncmp(arg, "-S", 2))
+ pickaxe = arg + 2;
+ else if (!strncmp(arg, "-O", 2))
+ orderfile = arg + 2;
+ else if (!strncmp(arg, "--diff-filter=", 14))
+ diff_filter = arg + 14;
+ else if (!strcmp(arg, "--pickaxe-all"))
+ pickaxe_opts = DIFF_PICKAXE_ALL;
+ else
+ usage(diff_stages_usage);
+ ac--; av++;
+ }
+
+ if (ac < 3 ||
+ sscanf(av[1], "%d", &stage1) != 1 ||
+ ! (0 <= stage1 && stage1 <= 3) ||
+ sscanf(av[2], "%d", &stage2) != 1 ||
+ ! (0 <= stage2 && stage2 <= 3) ||
+ (find_copies_harder && detect_rename != DIFF_DETECT_COPY))
+ usage(diff_stages_usage);
+
+ av += 3; /* The rest from av[0] are for paths restriction. */
+ diff_setup(diff_setup_opt);
+
+ diff_stages(stage1, stage2);
+
+ diffcore_std(av,
+ detect_rename, diff_score_opt,
+ pickaxe, pickaxe_opts,
+ diff_break_opt,
+ orderfile,
+ diff_filter);
+ diff_flush(diff_output_format, diff_line_termination);
+ return 0;
+}
diff --git a/diff-tree.c b/diff-tree.c
new file mode 100644
index 0000000..5b9467d
--- /dev/null
+++ b/diff-tree.c
@@ -0,0 +1,559 @@
+#include <ctype.h>
+#include "cache.h"
+#include "diff.h"
+#include "commit.h"
+
+static int show_root_diff = 0;
+static int verbose_header = 0;
+static int ignore_merges = 1;
+static int recursive = 0;
+static int show_tree_entry_in_recursive = 0;
+static int read_stdin = 0;
+static int diff_output_format = DIFF_FORMAT_RAW;
+static int diff_line_termination = '\n';
+static int detect_rename = 0;
+static int find_copies_harder = 0;
+static int diff_setup_opt = 0;
+static int diff_score_opt = 0;
+static const char *pickaxe = NULL;
+static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
+static const char *orderfile = NULL;
+static const char *diff_filter = NULL;
+static const char *header = NULL;
+static const char *header_prefix = "";
+static enum cmit_fmt commit_format = CMIT_FMT_RAW;
+
+// What paths are we interested in?
+static int nr_paths = 0;
+static const char **paths = NULL;
+static int *pathlens = NULL;
+
+static int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base);
+
+static void update_tree_entry(void **bufp, unsigned long *sizep)
+{
+ void *buf = *bufp;
+ unsigned long size = *sizep;
+ int len = strlen(buf) + 1 + 20;
+
+ if (size < len)
+ die("corrupt tree file");
+ *bufp = buf + len;
+ *sizep = size - len;
+}
+
+static const unsigned char *extract(void *tree, unsigned long size, const char **pathp, unsigned int *modep)
+{
+ int len = strlen(tree)+1;
+ const unsigned char *sha1 = tree + len;
+ const char *path = strchr(tree, ' ');
+ unsigned int mode;
+
+ if (!path || size < len + 20 || sscanf(tree, "%o", &mode) != 1)
+ die("corrupt tree file");
+ *pathp = path+1;
+ *modep = DIFF_FILE_CANON_MODE(mode);
+ return sha1;
+}
+
+static char *malloc_base(const char *base, const char *path, int pathlen)
+{
+ int baselen = strlen(base);
+ char *newbase = xmalloc(baselen + pathlen + 2);
+ memcpy(newbase, base, baselen);
+ memcpy(newbase + baselen, path, pathlen);
+ memcpy(newbase + baselen + pathlen, "/", 2);
+ return newbase;
+}
+
+static void show_file(const char *prefix, void *tree, unsigned long size, const char *base);
+static void show_tree(const char *prefix, void *tree, unsigned long size, const char *base);
+
+/* A file entry went away or appeared */
+static void show_file(const char *prefix, void *tree, unsigned long size, const char *base)
+{
+ unsigned mode;
+ const char *path;
+ const unsigned char *sha1 = extract(tree, size, &path, &mode);
+
+ if (recursive && S_ISDIR(mode)) {
+ char type[20];
+ unsigned long size;
+ char *newbase = malloc_base(base, path, strlen(path));
+ void *tree;
+
+ tree = read_sha1_file(sha1, type, &size);
+ if (!tree || strcmp(type, "tree"))
+ die("corrupt tree sha %s", sha1_to_hex(sha1));
+
+ show_tree(prefix, tree, size, newbase);
+
+ free(tree);
+ free(newbase);
+ return;
+ }
+
+ diff_addremove(prefix[0], mode, sha1, base, path);
+}
+
+static int compare_tree_entry(void *tree1, unsigned long size1, void *tree2, unsigned long size2, const char *base)
+{
+ unsigned mode1, mode2;
+ const char *path1, *path2;
+ const unsigned char *sha1, *sha2;
+ int cmp, pathlen1, pathlen2;
+
+ sha1 = extract(tree1, size1, &path1, &mode1);
+ sha2 = extract(tree2, size2, &path2, &mode2);
+
+ pathlen1 = strlen(path1);
+ pathlen2 = strlen(path2);
+ cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2);
+ if (cmp < 0) {
+ show_file("-", tree1, size1, base);
+ return -1;
+ }
+ if (cmp > 0) {
+ show_file("+", tree2, size2, base);
+ return 1;
+ }
+ if (!find_copies_harder && !memcmp(sha1, sha2, 20) && mode1 == mode2)
+ return 0;
+
+ /*
+ * If the filemode has changed to/from a directory from/to a regular
+ * file, we need to consider it a remove and an add.
+ */
+ if (S_ISDIR(mode1) != S_ISDIR(mode2)) {
+ show_file("-", tree1, size1, base);
+ show_file("+", tree2, size2, base);
+ return 0;
+ }
+
+ if (recursive && S_ISDIR(mode1)) {
+ int retval;
+ char *newbase = malloc_base(base, path1, pathlen1);
+ if (show_tree_entry_in_recursive)
+ diff_change(mode1, mode2, sha1, sha2, base, path1);
+ retval = diff_tree_sha1(sha1, sha2, newbase);
+ free(newbase);
+ return retval;
+ }
+
+ diff_change(mode1, mode2, sha1, sha2, base, path1);
+ return 0;
+}
+
+static int interesting(void *tree, unsigned long size, const char *base)
+{
+ const char *path;
+ unsigned mode;
+ int i;
+ int baselen, pathlen;
+
+ if (!nr_paths)
+ return 1;
+
+ (void)extract(tree, size, &path, &mode);
+
+ pathlen = strlen(path);
+ baselen = strlen(base);
+
+ for (i=0; i < nr_paths; i++) {
+ const char *match = paths[i];
+ int matchlen = pathlens[i];
+
+ if (baselen >= matchlen) {
+ /* If it doesn't match, move along... */
+ if (strncmp(base, match, matchlen))
+ continue;
+
+ /* The base is a subdirectory of a path which was specified. */
+ return 1;
+ }
+
+ /* Does the base match? */
+ if (strncmp(base, match, baselen))
+ continue;
+
+ match += baselen;
+ matchlen -= baselen;
+
+ if (pathlen > matchlen)
+ continue;
+
+ if (matchlen > pathlen) {
+ if (match[pathlen] != '/')
+ continue;
+ if (!S_ISDIR(mode))
+ continue;
+ }
+
+ if (strncmp(path, match, pathlen))
+ continue;
+
+ return 1;
+ }
+ return 0; /* No matches */
+}
+
+/* A whole sub-tree went away or appeared */
+static void show_tree(const char *prefix, void *tree, unsigned long size, const char *base)
+{
+ while (size) {
+ if (interesting(tree, size, base))
+ show_file(prefix, tree, size, base);
+ update_tree_entry(&tree, &size);
+ }
+}
+
+static int diff_tree(void *tree1, unsigned long size1, void *tree2, unsigned long size2, const char *base)
+{
+ while (size1 | size2) {
+ if (nr_paths && size1 && !interesting(tree1, size1, base)) {
+ update_tree_entry(&tree1, &size1);
+ continue;
+ }
+ if (nr_paths && size2 && !interesting(tree2, size2, base)) {
+ update_tree_entry(&tree2, &size2);
+ continue;
+ }
+ if (!size1) {
+ show_file("+", tree2, size2, base);
+ update_tree_entry(&tree2, &size2);
+ continue;
+ }
+ if (!size2) {
+ show_file("-", tree1, size1, base);
+ update_tree_entry(&tree1, &size1);
+ continue;
+ }
+ switch (compare_tree_entry(tree1, size1, tree2, size2, base)) {
+ case -1:
+ update_tree_entry(&tree1, &size1);
+ continue;
+ case 0:
+ update_tree_entry(&tree1, &size1);
+ /* Fallthrough */
+ case 1:
+ update_tree_entry(&tree2, &size2);
+ continue;
+ }
+ die("git-diff-tree: internal error");
+ }
+ return 0;
+}
+
+static int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base)
+{
+ void *tree1, *tree2;
+ unsigned long size1, size2;
+ int retval;
+
+ tree1 = read_object_with_reference(old, "tree", &size1, NULL);
+ if (!tree1)
+ die("unable to read source tree (%s)", sha1_to_hex(old));
+ tree2 = read_object_with_reference(new, "tree", &size2, NULL);
+ if (!tree2)
+ die("unable to read destination tree (%s)", sha1_to_hex(new));
+ retval = diff_tree(tree1, size1, tree2, size2, base);
+ free(tree1);
+ free(tree2);
+ return retval;
+}
+
+static void call_diff_setup(void)
+{
+ diff_setup(diff_setup_opt);
+}
+
+static int call_diff_flush(void)
+{
+ diffcore_std(NULL,
+ detect_rename, diff_score_opt,
+ pickaxe, pickaxe_opts,
+ diff_break_opt,
+ orderfile,
+ diff_filter);
+ if (diff_queue_is_empty()) {
+ diff_flush(DIFF_FORMAT_NO_OUTPUT, diff_line_termination);
+ return 0;
+ }
+ if (header) {
+ printf("%s%c", header, diff_line_termination);
+ header = NULL;
+ }
+ diff_flush(diff_output_format, diff_line_termination);
+ return 1;
+}
+
+static int diff_tree_sha1_top(const unsigned char *old,
+ const unsigned char *new, const char *base)
+{
+ int ret;
+
+ call_diff_setup();
+ ret = diff_tree_sha1(old, new, base);
+ call_diff_flush();
+ return ret;
+}
+
+static int diff_root_tree(const unsigned char *new, const char *base)
+{
+ int retval;
+ void *tree;
+ unsigned long size;
+
+ call_diff_setup();
+ tree = read_object_with_reference(new, "tree", &size, NULL);
+ if (!tree)
+ die("unable to read root tree (%s)", sha1_to_hex(new));
+ retval = diff_tree("", 0, tree, size, base);
+ free(tree);
+ call_diff_flush();
+ return retval;
+}
+
+static const char *generate_header(const char *commit, const char *parent, const char *msg, unsigned long len)
+{
+ static char this_header[16384];
+ int offset;
+
+ if (!verbose_header)
+ return commit;
+
+ offset = sprintf(this_header, "%s%s (from %s)\n", header_prefix, commit, parent);
+ offset += pretty_print_commit(commit_format, msg, len, this_header + offset, sizeof(this_header) - offset);
+ return this_header;
+}
+
+static int diff_tree_commit(const unsigned char *commit, const char *name)
+{
+ unsigned long size, offset;
+ char *buf = read_object_with_reference(commit, "commit", &size, NULL);
+
+ if (!buf)
+ return -1;
+
+ if (!name) {
+ static char commit_name[60];
+ strcpy(commit_name, sha1_to_hex(commit));
+ name = commit_name;
+ }
+
+ /* Root commit? */
+ if (show_root_diff && memcmp(buf + 46, "parent ", 7)) {
+ header = generate_header(name, "root", buf, size);
+ diff_root_tree(commit, "");
+ }
+
+ /* More than one parent? */
+ if (ignore_merges) {
+ if (!memcmp(buf + 46 + 48, "parent ", 7))
+ return 0;
+ }
+
+ offset = 46;
+ while (offset + 48 < size && !memcmp(buf + offset, "parent ", 7)) {
+ unsigned char parent[20];
+ if (get_sha1_hex(buf + offset + 7, parent))
+ return -1;
+ header = generate_header(name, sha1_to_hex(parent), buf, size);
+ diff_tree_sha1_top(parent, commit, "");
+ if (!header && verbose_header) {
+ header_prefix = "\ndiff-tree ";
+ /*
+ * Don't print multiple merge entries if we
+ * don't print the diffs.
+ */
+ }
+ offset += 48;
+ }
+ return 0;
+}
+
+static int diff_tree_stdin(char *line)
+{
+ int len = strlen(line);
+ unsigned char commit[20], parent[20];
+ static char this_header[1000];
+
+ if (!len || line[len-1] != '\n')
+ return -1;
+ line[len-1] = 0;
+ if (get_sha1_hex(line, commit))
+ return -1;
+ if (isspace(line[40]) && !get_sha1_hex(line+41, parent)) {
+ line[40] = 0;
+ line[81] = 0;
+ sprintf(this_header, "%s (from %s)\n", line, line+41);
+ header = this_header;
+ return diff_tree_sha1_top(parent, commit, "");
+ }
+ line[40] = 0;
+ return diff_tree_commit(commit, line);
+}
+
+static char *diff_tree_usage =
+"git-diff-tree [--stdin] [-m] [-s] [-v] [--pretty] [-t] "
+"[<common diff options>] <tree-ish> <tree-ish>"
+COMMON_DIFF_OPTIONS_HELP;
+
+int main(int argc, const char **argv)
+{
+ int nr_sha1;
+ char line[1000];
+ unsigned char sha1[2][20];
+
+ nr_sha1 = 0;
+ for (;;) {
+ const char *arg;
+
+ argv++;
+ argc--;
+ arg = *argv;
+ if (!arg)
+ break;
+
+ if (*arg != '-') {
+ if (nr_sha1 < 2 && !get_sha1(arg, sha1[nr_sha1])) {
+ nr_sha1++;
+ continue;
+ }
+ break;
+ }
+
+ if (!strcmp(arg, "--")) {
+ argv++;
+ argc--;
+ break;
+ }
+ if (!strcmp(arg, "-r")) {
+ recursive = 1;
+ continue;
+ }
+ if (!strcmp(arg, "-t")) {
+ recursive = show_tree_entry_in_recursive = 1;
+ continue;
+ }
+ if (!strcmp(arg, "-R")) {
+ diff_setup_opt |= DIFF_SETUP_REVERSE;
+ continue;
+ }
+ if (!strcmp(arg, "-p") || !strcmp(arg, "-u")) {
+ diff_output_format = DIFF_FORMAT_PATCH;
+ recursive = 1;
+ continue;
+ }
+ if (!strncmp(arg, "-S", 2)) {
+ pickaxe = arg + 2;
+ continue;
+ }
+ if (!strncmp(arg, "-O", 2)) {
+ orderfile = arg + 2;
+ continue;
+ }
+ if (!strncmp(arg, "--diff-filter=", 14)) {
+ diff_filter = arg + 14;
+ continue;
+ }
+ if (!strcmp(arg, "--pickaxe-all")) {
+ pickaxe_opts = DIFF_PICKAXE_ALL;
+ continue;
+ }
+ if (!strncmp(arg, "-M", 2)) {
+ detect_rename = DIFF_DETECT_RENAME;
+ if ((diff_score_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_tree_usage);
+ continue;
+ }
+ if (!strncmp(arg, "-C", 2)) {
+ detect_rename = DIFF_DETECT_COPY;
+ if ((diff_score_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_tree_usage);
+ continue;
+ }
+ if (!strncmp(arg, "-B", 2)) {
+ if ((diff_break_opt = diff_scoreopt_parse(arg)) == -1)
+ usage(diff_tree_usage);
+ continue;
+ }
+ if (!strcmp(arg, "--find-copies-harder")) {
+ find_copies_harder = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--name-only")) {
+ diff_output_format = DIFF_FORMAT_NAME;
+ continue;
+ }
+ if (!strcmp(arg, "-z")) {
+ diff_line_termination = 0;
+ continue;
+ }
+ if (!strcmp(arg, "-m")) {
+ ignore_merges = 0;
+ continue;
+ }
+ if (!strcmp(arg, "-s")) {
+ diff_output_format = DIFF_FORMAT_NO_OUTPUT;
+ continue;
+ }
+ if (!strcmp(arg, "-v")) {
+ verbose_header = 1;
+ header_prefix = "diff-tree ";
+ continue;
+ }
+ if (!strncmp(arg, "--pretty", 8)) {
+ verbose_header = 1;
+ header_prefix = "diff-tree ";
+ commit_format = get_commit_format(arg+8);
+ continue;
+ }
+ if (!strcmp(arg, "--stdin")) {
+ read_stdin = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--root")) {
+ show_root_diff = 1;
+ continue;
+ }
+ usage(diff_tree_usage);
+ }
+ if (find_copies_harder && detect_rename != DIFF_DETECT_COPY)
+ usage(diff_tree_usage);
+
+ if (argc > 0) {
+ int i;
+
+ paths = argv;
+ nr_paths = argc;
+ pathlens = xmalloc(nr_paths * sizeof(int));
+ for (i=0; i<nr_paths; i++)
+ pathlens[i] = strlen(paths[i]);
+ }
+
+ switch (nr_sha1) {
+ case 0:
+ if (!read_stdin)
+ usage(diff_tree_usage);
+ break;
+ case 1:
+ diff_tree_commit(sha1[0], NULL);
+ break;
+ case 2:
+ diff_tree_sha1_top(sha1[0], sha1[1], "");
+ break;
+ }
+
+ if (!read_stdin)
+ return 0;
+
+ if (detect_rename)
+ diff_setup_opt |= (DIFF_SETUP_USE_SIZE_CACHE |
+ DIFF_SETUP_USE_CACHE);
+ while (fgets(line, sizeof(line), stdin))
+ diff_tree_stdin(line);
+
+ return 0;
+}
diff --git a/diff.c b/diff.c
new file mode 100644
index 0000000..4a4b621
--- /dev/null
+++ b/diff.c
@@ -0,0 +1,1201 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include "cache.h"
+#include "quote.h"
+#include "diff.h"
+#include "diffcore.h"
+
+static const char *diff_opts = "-pu";
+static unsigned char null_sha1[20] = { 0, };
+
+static int reverse_diff;
+static int use_size_cache;
+
+static const char *external_diff(void)
+{
+ static const char *external_diff_cmd = NULL;
+ static int done_preparing = 0;
+
+ if (done_preparing)
+ return external_diff_cmd;
+
+ /*
+ * Default values above are meant to match the
+ * Linux kernel development style. Examples of
+ * alternative styles you can specify via environment
+ * variables are:
+ *
+ * GIT_DIFF_OPTS="-c";
+ */
+ if (gitenv("GIT_EXTERNAL_DIFF"))
+ external_diff_cmd = gitenv("GIT_EXTERNAL_DIFF");
+
+ /* In case external diff fails... */
+ diff_opts = gitenv("GIT_DIFF_OPTS") ? : diff_opts;
+
+ done_preparing = 1;
+ return external_diff_cmd;
+}
+
+static struct diff_tempfile {
+ const char *name; /* filename external diff should read from */
+ char hex[41];
+ char mode[10];
+ char tmp_path[50];
+} diff_temp[2];
+
+static int count_lines(const char *filename)
+{
+ FILE *in;
+ int count, ch, completely_empty = 1, nl_just_seen = 0;
+ in = fopen(filename, "r");
+ count = 0;
+ while ((ch = fgetc(in)) != EOF)
+ if (ch == '\n') {
+ count++;
+ nl_just_seen = 1;
+ completely_empty = 0;
+ }
+ else {
+ nl_just_seen = 0;
+ completely_empty = 0;
+ }
+ fclose(in);
+ if (completely_empty)
+ return 0;
+ if (!nl_just_seen)
+ count++; /* no trailing newline */
+ return count;
+}
+
+static void print_line_count(int count)
+{
+ switch (count) {
+ case 0:
+ printf("0,0");
+ break;
+ case 1:
+ printf("1");
+ break;
+ default:
+ printf("1,%d", count);
+ break;
+ }
+}
+
+static void copy_file(int prefix, const char *filename)
+{
+ FILE *in;
+ int ch, nl_just_seen = 1;
+ in = fopen(filename, "r");
+ while ((ch = fgetc(in)) != EOF) {
+ if (nl_just_seen)
+ putchar(prefix);
+ putchar(ch);
+ if (ch == '\n')
+ nl_just_seen = 1;
+ else
+ nl_just_seen = 0;
+ }
+ fclose(in);
+ if (!nl_just_seen)
+ printf("\n\\ No newline at end of file\n");
+}
+
+static void emit_rewrite_diff(const char *name_a,
+ const char *name_b,
+ struct diff_tempfile *temp)
+{
+ /* Use temp[i].name as input, name_a and name_b as labels */
+ int lc_a, lc_b;
+ lc_a = count_lines(temp[0].name);
+ lc_b = count_lines(temp[1].name);
+ printf("--- %s\n+++ %s\n@@ -", name_a, name_b);
+ print_line_count(lc_a);
+ printf(" +");
+ print_line_count(lc_b);
+ printf(" @@\n");
+ if (lc_a)
+ copy_file('-', temp[0].name);
+ if (lc_b)
+ copy_file('+', temp[1].name);
+}
+
+static void builtin_diff(const char *name_a,
+ const char *name_b,
+ struct diff_tempfile *temp,
+ const char *xfrm_msg,
+ int complete_rewrite)
+{
+ int i, next_at, cmd_size;
+ const char *diff_cmd = "diff -L%s%s -L%s%s";
+ const char *diff_arg = "%s %s||:"; /* "||:" is to return 0 */
+ const char *input_name_sq[2];
+ const char *path0[2];
+ const char *path1[2];
+ const char *name_sq[2];
+ char *cmd;
+
+ name_sq[0] = sq_quote(name_a);
+ name_sq[1] = sq_quote(name_b);
+
+ /* diff_cmd and diff_arg have 6 %s in total which makes
+ * the sum of these strings 12 bytes larger than required.
+ * we use 2 spaces around diff-opts, and we need to count
+ * terminating NUL, so we subtract 9 here.
+ */
+ cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
+ strlen(diff_arg) - 9);
+ for (i = 0; i < 2; i++) {
+ input_name_sq[i] = sq_quote(temp[i].name);
+ if (!strcmp(temp[i].name, "/dev/null")) {
+ path0[i] = "/dev/null";
+ path1[i] = "";
+ } else {
+ path0[i] = i ? "b/" : "a/";
+ path1[i] = name_sq[i];
+ }
+ cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
+ strlen(input_name_sq[i]));
+ }
+
+ cmd = xmalloc(cmd_size);
+
+ next_at = 0;
+ next_at += snprintf(cmd+next_at, cmd_size-next_at,
+ diff_cmd,
+ path0[0], path1[0], path0[1], path1[1]);
+ next_at += snprintf(cmd+next_at, cmd_size-next_at,
+ " %s ", diff_opts);
+ next_at += snprintf(cmd+next_at, cmd_size-next_at,
+ diff_arg, input_name_sq[0], input_name_sq[1]);
+
+ printf("diff --git a/%s b/%s\n", name_a, name_b);
+ if (!path1[0][0]) {
+ printf("new file mode %s\n", temp[1].mode);
+ if (xfrm_msg && xfrm_msg[0])
+ puts(xfrm_msg);
+ }
+ else if (!path1[1][0]) {
+ printf("deleted file mode %s\n", temp[0].mode);
+ if (xfrm_msg && xfrm_msg[0])
+ puts(xfrm_msg);
+ }
+ else {
+ if (strcmp(temp[0].mode, temp[1].mode)) {
+ printf("old mode %s\n", temp[0].mode);
+ printf("new mode %s\n", temp[1].mode);
+ }
+ if (xfrm_msg && xfrm_msg[0])
+ puts(xfrm_msg);
+ if (strncmp(temp[0].mode, temp[1].mode, 3))
+ /* we do not run diff between different kind
+ * of objects.
+ */
+ exit(0);
+ if (complete_rewrite) {
+ fflush(NULL);
+ emit_rewrite_diff(name_a, name_b, temp);
+ exit(0);
+ }
+ }
+ fflush(NULL);
+ execlp("/bin/sh","sh", "-c", cmd, NULL);
+}
+
+struct diff_filespec *alloc_filespec(const char *path)
+{
+ int namelen = strlen(path);
+ struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1);
+ spec->path = (char *)(spec + 1);
+ strcpy(spec->path, path);
+ spec->should_free = spec->should_munmap = 0;
+ spec->xfrm_flags = 0;
+ spec->size = 0;
+ spec->data = NULL;
+ spec->mode = 0;
+ memset(spec->sha1, 0, 20);
+ return spec;
+}
+
+void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
+ unsigned short mode)
+{
+ if (mode) {
+ spec->mode = DIFF_FILE_CANON_MODE(mode);
+ memcpy(spec->sha1, sha1, 20);
+ spec->sha1_valid = !!memcmp(sha1, null_sha1, 20);
+ }
+}
+
+/*
+ * Given a name and sha1 pair, if the dircache tells us the file in
+ * the work tree has that object contents, return true, so that
+ * prepare_temp_file() does not have to inflate and extract.
+ */
+static int work_tree_matches(const char *name, const unsigned char *sha1)
+{
+ struct cache_entry *ce;
+ struct stat st;
+ int pos, len;
+
+ /* We do not read the cache ourselves here, because the
+ * benchmark with my previous version that always reads cache
+ * shows that it makes things worse for diff-tree comparing
+ * two linux-2.6 kernel trees in an already checked out work
+ * tree. This is because most diff-tree comparisons deal with
+ * only a small number of files, while reading the cache is
+ * expensive for a large project, and its cost outweighs the
+ * savings we get by not inflating the object to a temporary
+ * file. Practically, this code only helps when we are used
+ * by diff-cache --cached, which does read the cache before
+ * calling us.
+ */
+ if (!active_cache)
+ return 0;
+
+ len = strlen(name);
+ pos = cache_name_pos(name, len);
+ if (pos < 0)
+ return 0;
+ ce = active_cache[pos];
+ if ((lstat(name, &st) < 0) ||
+ !S_ISREG(st.st_mode) || /* careful! */
+ ce_match_stat(ce, &st) ||
+ memcmp(sha1, ce->sha1, 20))
+ return 0;
+ /* we return 1 only when we can stat, it is a regular file,
+ * stat information matches, and sha1 recorded in the cache
+ * matches. I.e. we know the file in the work tree really is
+ * the same as the <name, sha1> pair.
+ */
+ return 1;
+}
+
+static struct sha1_size_cache {
+ unsigned char sha1[20];
+ unsigned long size;
+} **sha1_size_cache;
+static int sha1_size_cache_nr, sha1_size_cache_alloc;
+
+static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
+ int find_only,
+ unsigned long size)
+{
+ int first, last;
+ struct sha1_size_cache *e;
+
+ first = 0;
+ last = sha1_size_cache_nr;
+ while (last > first) {
+ int cmp, next = (last + first) >> 1;
+ e = sha1_size_cache[next];
+ cmp = memcmp(e->sha1, sha1, 20);
+ if (!cmp)
+ return e;
+ if (cmp < 0) {
+ last = next;
+ continue;
+ }
+ first = next+1;
+ }
+ /* not found */
+ if (find_only)
+ return NULL;
+ /* insert to make it at "first" */
+ if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
+ sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc);
+ sha1_size_cache = xrealloc(sha1_size_cache,
+ sha1_size_cache_alloc *
+ sizeof(*sha1_size_cache));
+ }
+ sha1_size_cache_nr++;
+ if (first < sha1_size_cache_nr)
+ memmove(sha1_size_cache + first + 1, sha1_size_cache + first,
+ (sha1_size_cache_nr - first - 1) *
+ sizeof(*sha1_size_cache));
+ e = xmalloc(sizeof(struct sha1_size_cache));
+ sha1_size_cache[first] = e;
+ memcpy(e->sha1, sha1, 20);
+ e->size = size;
+ return e;
+}
+
+/*
+ * While doing rename detection and pickaxe operation, we may need to
+ * grab the data for the blob (or file) for our own in-core comparison.
+ * diff_filespec has data and size fields for this purpose.
+ */
+int diff_populate_filespec(struct diff_filespec *s, int size_only)
+{
+ int err = 0;
+ if (!DIFF_FILE_VALID(s))
+ die("internal error: asking to populate invalid file.");
+ if (S_ISDIR(s->mode))
+ return -1;
+
+ if (!use_size_cache)
+ size_only = 0;
+
+ if (s->data)
+ return err;
+ if (!s->sha1_valid ||
+ work_tree_matches(s->path, s->sha1)) {
+ struct stat st;
+ int fd;
+ if (lstat(s->path, &st) < 0) {
+ if (errno == ENOENT) {
+ err_empty:
+ err = -1;
+ empty:
+ s->data = "";
+ s->size = 0;
+ return err;
+ }
+ }
+ s->size = st.st_size;
+ if (!s->size)
+ goto empty;
+ if (size_only)
+ return 0;
+ if (S_ISLNK(st.st_mode)) {
+ int ret;
+ s->data = xmalloc(s->size);
+ s->should_free = 1;
+ ret = readlink(s->path, s->data, s->size);
+ if (ret < 0) {
+ free(s->data);
+ goto err_empty;
+ }
+ return 0;
+ }
+ fd = open(s->path, O_RDONLY);
+ if (fd < 0)
+ goto err_empty;
+ s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
+ s->should_munmap = 1;
+ close(fd);
+ }
+ else {
+ char type[20];
+ struct sha1_size_cache *e;
+
+ if (size_only) {
+ e = locate_size_cache(s->sha1, 1, 0);
+ if (e) {
+ s->size = e->size;
+ return 0;
+ }
+ if (!sha1_object_info(s->sha1, type, &s->size))
+ locate_size_cache(s->sha1, 0, s->size);
+ }
+ else {
+ s->data = read_sha1_file(s->sha1, type, &s->size);
+ s->should_free = 1;
+ }
+ }
+ return 0;
+}
+
+void diff_free_filespec_data(struct diff_filespec *s)
+{
+ if (s->should_free)
+ free(s->data);
+ else if (s->should_munmap)
+ munmap(s->data, s->size);
+ s->should_free = s->should_munmap = 0;
+ s->data = NULL;
+}
+
+static void prep_temp_blob(struct diff_tempfile *temp,
+ void *blob,
+ unsigned long size,
+ unsigned char *sha1,
+ int mode)
+{
+ int fd;
+
+ strcpy(temp->tmp_path, ".diff_XXXXXX");
+ fd = mkstemp(temp->tmp_path);
+ if (fd < 0)
+ die("unable to create temp-file");
+ if (write(fd, blob, size) != size)
+ die("unable to write temp-file");
+ close(fd);
+ temp->name = temp->tmp_path;
+ strcpy(temp->hex, sha1_to_hex(sha1));
+ temp->hex[40] = 0;
+ sprintf(temp->mode, "%06o", mode);
+}
+
+static void prepare_temp_file(const char *name,
+ struct diff_tempfile *temp,
+ struct diff_filespec *one)
+{
+ if (!DIFF_FILE_VALID(one)) {
+ not_a_valid_file:
+ /* A '-' entry produces this for file-2, and
+ * a '+' entry produces this for file-1.
+ */
+ temp->name = "/dev/null";
+ strcpy(temp->hex, ".");
+ strcpy(temp->mode, ".");
+ return;
+ }
+
+ if (!one->sha1_valid ||
+ work_tree_matches(name, one->sha1)) {
+ struct stat st;
+ if (lstat(name, &st) < 0) {
+ if (errno == ENOENT)
+ goto not_a_valid_file;
+ die("stat(%s): %s", name, strerror(errno));
+ }
+ if (S_ISLNK(st.st_mode)) {
+ int ret;
+ char *buf, buf_[1024];
+ buf = ((sizeof(buf_) < st.st_size) ?
+ xmalloc(st.st_size) : buf_);
+ ret = readlink(name, buf, st.st_size);
+ if (ret < 0)
+ die("readlink(%s)", name);
+ prep_temp_blob(temp, buf, st.st_size,
+ (one->sha1_valid ?
+ one->sha1 : null_sha1),
+ (one->sha1_valid ?
+ one->mode : S_IFLNK));
+ }
+ else {
+ /* we can borrow from the file in the work tree */
+ temp->name = name;
+ if (!one->sha1_valid)
+ strcpy(temp->hex, sha1_to_hex(null_sha1));
+ else
+ strcpy(temp->hex, sha1_to_hex(one->sha1));
+ /* Even though we may sometimes borrow the
+ * contents from the work tree, we always want
+ * one->mode. mode is trustworthy even when
+ * !(one->sha1_valid), as long as
+ * DIFF_FILE_VALID(one).
+ */
+ sprintf(temp->mode, "%06o", one->mode);
+ }
+ return;
+ }
+ else {
+ if (diff_populate_filespec(one, 0))
+ die("cannot read data blob for %s", one->path);
+ prep_temp_blob(temp, one->data, one->size,
+ one->sha1, one->mode);
+ }
+}
+
+static void remove_tempfile(void)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ if (diff_temp[i].name == diff_temp[i].tmp_path) {
+ unlink(diff_temp[i].name);
+ diff_temp[i].name = NULL;
+ }
+}
+
+static void remove_tempfile_on_signal(int signo)
+{
+ remove_tempfile();
+}
+
+/* An external diff command takes:
+ *
+ * diff-cmd name infile1 infile1-sha1 infile1-mode \
+ * infile2 infile2-sha1 infile2-mode [ rename-to ]
+ *
+ */
+static void run_external_diff(const char *pgm,
+ const char *name,
+ const char *other,
+ struct diff_filespec *one,
+ struct diff_filespec *two,
+ const char *xfrm_msg,
+ int complete_rewrite)
+{
+ struct diff_tempfile *temp = diff_temp;
+ pid_t pid;
+ int status;
+ static int atexit_asked = 0;
+
+ if (one && two) {
+ prepare_temp_file(name, &temp[0], one);
+ prepare_temp_file(other ? : name, &temp[1], two);
+ if (! atexit_asked &&
+ (temp[0].name == temp[0].tmp_path ||
+ temp[1].name == temp[1].tmp_path)) {
+ atexit_asked = 1;
+ atexit(remove_tempfile);
+ }
+ signal(SIGINT, remove_tempfile_on_signal);
+ }
+
+ fflush(NULL);
+ pid = fork();
+ if (pid < 0)
+ die("unable to fork");
+ if (!pid) {
+ if (pgm) {
+ if (one && two) {
+ const char *exec_arg[10];
+ const char **arg = &exec_arg[0];
+ *arg++ = pgm;
+ *arg++ = name;
+ *arg++ = temp[0].name;
+ *arg++ = temp[0].hex;
+ *arg++ = temp[0].mode;
+ *arg++ = temp[1].name;
+ *arg++ = temp[1].hex;
+ *arg++ = temp[1].mode;
+ if (other) {
+ *arg++ = other;
+ *arg++ = xfrm_msg;
+ }
+ *arg = NULL;
+ execvp(pgm, (char *const*) exec_arg);
+ }
+ else
+ execlp(pgm, pgm, name, NULL);
+ }
+ /*
+ * otherwise we use the built-in one.
+ */
+ if (one && two)
+ builtin_diff(name, other ? : name, temp, xfrm_msg,
+ complete_rewrite);
+ else
+ printf("* Unmerged path %s\n", name);
+ exit(0);
+ }
+ if (waitpid(pid, &status, 0) < 0 ||
+ !WIFEXITED(status) || WEXITSTATUS(status)) {
+ /* Earlier we did not check the exit status because
+ * diff exits non-zero if files are different, and
+ * we are not interested in knowing that. It was a
+ * mistake which made it harder to quit a diff-*
+ * session that uses the git-apply-patch-script as
+ * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
+ * should also exit non-zero only when it wants to
+ * abort the entire diff-* session.
+ */
+ remove_tempfile();
+ fprintf(stderr, "external diff died, stopping at %s.\n", name);
+ exit(1);
+ }
+ remove_tempfile();
+}
+
+static void run_diff(struct diff_filepair *p)
+{
+ const char *pgm = external_diff();
+ char msg_[PATH_MAX*2+200], *xfrm_msg;
+ struct diff_filespec *one;
+ struct diff_filespec *two;
+ const char *name;
+ const char *other;
+ int complete_rewrite = 0;
+
+ if (DIFF_PAIR_UNMERGED(p)) {
+ /* unmerged */
+ run_external_diff(pgm, p->one->path, NULL, NULL, NULL, NULL,
+ 0);
+ return;
+ }
+
+ name = p->one->path;
+ other = (strcmp(name, p->two->path) ? p->two->path : NULL);
+ one = p->one; two = p->two;
+ switch (p->status) {
+ case DIFF_STATUS_COPIED:
+ sprintf(msg_,
+ "similarity index %d%%\n"
+ "copy from %s\n"
+ "copy to %s",
+ (int)(0.5 + p->score * 100.0/MAX_SCORE),
+ name, other);
+ xfrm_msg = msg_;
+ break;
+ case DIFF_STATUS_RENAMED:
+ sprintf(msg_,
+ "similarity index %d%%\n"
+ "rename from %s\n"
+ "rename to %s",
+ (int)(0.5 + p->score * 100.0/MAX_SCORE),
+ name, other);
+ xfrm_msg = msg_;
+ break;
+ case DIFF_STATUS_MODIFIED:
+ if (p->score) {
+ sprintf(msg_,
+ "dissimilarity index %d%%",
+ (int)(0.5 + p->score * 100.0/MAX_SCORE));
+ xfrm_msg = msg_;
+ complete_rewrite = 1;
+ break;
+ }
+ /* fallthru */
+ default:
+ xfrm_msg = NULL;
+ }
+
+ if (!pgm &&
+ DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) &&
+ (S_IFMT & one->mode) != (S_IFMT & two->mode)) {
+ /* a filepair that changes between file and symlink
+ * needs to be split into deletion and creation.
+ */
+ struct diff_filespec *null = alloc_filespec(two->path);
+ run_external_diff(NULL, name, other, one, null, xfrm_msg, 0);
+ free(null);
+ null = alloc_filespec(one->path);
+ run_external_diff(NULL, name, other, null, two, xfrm_msg, 0);
+ free(null);
+ }
+ else
+ run_external_diff(pgm, name, other, one, two, xfrm_msg,
+ complete_rewrite);
+}
+
+void diff_setup(int flags)
+{
+ if (flags & DIFF_SETUP_REVERSE)
+ reverse_diff = 1;
+ if (flags & DIFF_SETUP_USE_CACHE) {
+ if (!active_cache)
+ /* read-cache does not die even when it fails
+ * so it is safe for us to do this here. Also
+ * it does not smudge active_cache or active_nr
+ * when it fails, so we do not have to worry about
+ * cleaning it up oufselves either.
+ */
+ read_cache();
+ }
+ if (flags & DIFF_SETUP_USE_SIZE_CACHE)
+ use_size_cache = 1;
+
+}
+
+static int parse_num(const char **cp_p)
+{
+ int num, scale, ch, cnt;
+ const char *cp = *cp_p;
+
+ cnt = num = 0;
+ scale = 1;
+ while ('0' <= (ch = *cp) && ch <= '9') {
+ if (cnt++ < 5) {
+ /* We simply ignore more than 5 digits precision. */
+ scale *= 10;
+ num = num * 10 + ch - '0';
+ }
+ *cp++;
+ }
+ *cp_p = cp;
+
+ /* user says num divided by scale and we say internally that
+ * is MAX_SCORE * num / scale.
+ */
+ return (MAX_SCORE * num / scale);
+}
+
+int diff_scoreopt_parse(const char *opt)
+{
+ int opt1, opt2, cmd;
+
+ if (*opt++ != '-')
+ return -1;
+ cmd = *opt++;
+ if (cmd != 'M' && cmd != 'C' && cmd != 'B')
+ return -1; /* that is not a -M, -C nor -B option */
+
+ opt1 = parse_num(&opt);
+ if (cmd != 'B')
+ opt2 = 0;
+ else {
+ if (*opt == 0)
+ opt2 = 0;
+ else if (*opt != '/')
+ return -1; /* we expect -B80/99 or -B80 */
+ else {
+ opt++;
+ opt2 = parse_num(&opt);
+ }
+ }
+ if (*opt != 0)
+ return -1;
+ return opt1 | (opt2 << 16);
+}
+
+struct diff_queue_struct diff_queued_diff;
+
+void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp)
+{
+ if (queue->alloc <= queue->nr) {
+ queue->alloc = alloc_nr(queue->alloc);
+ queue->queue = xrealloc(queue->queue,
+ sizeof(dp) * queue->alloc);
+ }
+ queue->queue[queue->nr++] = dp;
+}
+
+struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
+ struct diff_filespec *one,
+ struct diff_filespec *two)
+{
+ struct diff_filepair *dp = xmalloc(sizeof(*dp));
+ dp->one = one;
+ dp->two = two;
+ dp->score = 0;
+ dp->status = 0;
+ dp->source_stays = 0;
+ dp->broken_pair = 0;
+ diff_q(queue, dp);
+ return dp;
+}
+
+void diff_free_filepair(struct diff_filepair *p)
+{
+ diff_free_filespec_data(p->one);
+ diff_free_filespec_data(p->two);
+ free(p);
+}
+
+static void diff_flush_raw(struct diff_filepair *p,
+ int line_termination,
+ int inter_name_termination)
+{
+ int two_paths;
+ char status[10];
+
+ if (line_termination) {
+ const char *err = "path %s cannot be expressed without -z";
+ if (strchr(p->one->path, line_termination) ||
+ strchr(p->one->path, inter_name_termination))
+ die(err, p->one->path);
+ if (strchr(p->two->path, line_termination) ||
+ strchr(p->two->path, inter_name_termination))
+ die(err, p->two->path);
+ }
+
+ if (p->score)
+ sprintf(status, "%c%03d", p->status,
+ (int)(0.5 + p->score * 100.0/MAX_SCORE));
+ else {
+ status[0] = p->status;
+ status[1] = 0;
+ }
+ switch (p->status) {
+ case DIFF_STATUS_COPIED:
+ case DIFF_STATUS_RENAMED:
+ two_paths = 1;
+ break;
+ case DIFF_STATUS_ADDED:
+ case DIFF_STATUS_DELETED:
+ two_paths = 0;
+ break;
+ default:
+ two_paths = 0;
+ break;
+ }
+ printf(":%06o %06o %s ",
+ p->one->mode, p->two->mode, sha1_to_hex(p->one->sha1));
+ printf("%s %s%c%s",
+ sha1_to_hex(p->two->sha1),
+ status,
+ inter_name_termination,
+ p->one->path);
+ if (two_paths)
+ printf("%c%s", inter_name_termination, p->two->path);
+ putchar(line_termination);
+}
+
+static void diff_flush_name(struct diff_filepair *p,
+ int line_termination)
+{
+ printf("%s%c", p->two->path, line_termination);
+}
+
+int diff_unmodified_pair(struct diff_filepair *p)
+{
+ /* This function is written stricter than necessary to support
+ * the currently implemented transformers, but the idea is to
+ * let transformers to produce diff_filepairs any way they want,
+ * and filter and clean them up here before producing the output.
+ */
+ struct diff_filespec *one, *two;
+
+ if (DIFF_PAIR_UNMERGED(p))
+ return 0; /* unmerged is interesting */
+
+ one = p->one;
+ two = p->two;
+
+ /* deletion, addition, mode or type change
+ * and rename are all interesting.
+ */
+ if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) ||
+ DIFF_PAIR_MODE_CHANGED(p) ||
+ strcmp(one->path, two->path))
+ return 0;
+
+ /* both are valid and point at the same path. that is, we are
+ * dealing with a change.
+ */
+ if (one->sha1_valid && two->sha1_valid &&
+ !memcmp(one->sha1, two->sha1, sizeof(one->sha1)))
+ return 1; /* no change */
+ if (!one->sha1_valid && !two->sha1_valid)
+ return 1; /* both look at the same file on the filesystem. */
+ return 0;
+}
+
+static void diff_flush_patch(struct diff_filepair *p)
+{
+ if (diff_unmodified_pair(p))
+ return;
+
+ if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
+ (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
+ return; /* no tree diffs in patch format */
+
+ run_diff(p);
+}
+
+int diff_queue_is_empty(void)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ int i;
+ for (i = 0; i < q->nr; i++)
+ if (!diff_unmodified_pair(q->queue[i]))
+ return 0;
+ return 1;
+}
+
+#if DIFF_DEBUG
+void diff_debug_filespec(struct diff_filespec *s, int x, const char *one)
+{
+ fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n",
+ x, one ? : "",
+ s->path,
+ DIFF_FILE_VALID(s) ? "valid" : "invalid",
+ s->mode,
+ s->sha1_valid ? sha1_to_hex(s->sha1) : "");
+ fprintf(stderr, "queue[%d] %s size %lu flags %d\n",
+ x, one ? : "",
+ s->size, s->xfrm_flags);
+}
+
+void diff_debug_filepair(const struct diff_filepair *p, int i)
+{
+ diff_debug_filespec(p->one, i, "one");
+ diff_debug_filespec(p->two, i, "two");
+ fprintf(stderr, "score %d, status %c stays %d broken %d\n",
+ p->score, p->status ? : '?',
+ p->source_stays, p->broken_pair);
+}
+
+void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
+{
+ int i;
+ if (msg)
+ fprintf(stderr, "%s\n", msg);
+ fprintf(stderr, "q->nr = %d\n", q->nr);
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ diff_debug_filepair(p, i);
+ }
+}
+#endif
+
+static void diff_resolve_rename_copy(void)
+{
+ int i, j;
+ struct diff_filepair *p, *pp;
+ struct diff_queue_struct *q = &diff_queued_diff;
+
+ diff_debug_queue("resolve-rename-copy", q);
+
+ for (i = 0; i < q->nr; i++) {
+ p = q->queue[i];
+ p->status = 0; /* undecided */
+ if (DIFF_PAIR_UNMERGED(p))
+ p->status = DIFF_STATUS_UNMERGED;
+ else if (!DIFF_FILE_VALID(p->one))
+ p->status = DIFF_STATUS_ADDED;
+ else if (!DIFF_FILE_VALID(p->two))
+ p->status = DIFF_STATUS_DELETED;
+ else if (DIFF_PAIR_TYPE_CHANGED(p))
+ p->status = DIFF_STATUS_TYPE_CHANGED;
+
+ /* from this point on, we are dealing with a pair
+ * whose both sides are valid and of the same type, i.e.
+ * either in-place edit or rename/copy edit.
+ */
+ else if (DIFF_PAIR_RENAME(p)) {
+ if (p->source_stays) {
+ p->status = DIFF_STATUS_COPIED;
+ continue;
+ }
+ /* See if there is some other filepair that
+ * copies from the same source as us. If so
+ * we are a copy. Otherwise we are a rename.
+ */
+ for (j = i + 1; j < q->nr; j++) {
+ pp = q->queue[j];
+ if (strcmp(pp->one->path, p->one->path))
+ continue; /* not us */
+ if (!DIFF_PAIR_RENAME(pp))
+ continue; /* not a rename/copy */
+ /* pp is a rename/copy from the same source */
+ p->status = DIFF_STATUS_COPIED;
+ break;
+ }
+ if (!p->status)
+ p->status = DIFF_STATUS_RENAMED;
+ }
+ else if (memcmp(p->one->sha1, p->two->sha1, 20) ||
+ p->one->mode != p->two->mode)
+ p->status = DIFF_STATUS_MODIFIED;
+ else {
+ /* This is a "no-change" entry and should not
+ * happen anymore, but prepare for broken callers.
+ */
+ error("feeding unmodified %s to diffcore",
+ p->one->path);
+ p->status = DIFF_STATUS_UNKNOWN;
+ }
+ }
+ diff_debug_queue("resolve-rename-copy done", q);
+}
+
+void diff_flush(int diff_output_style, int line_termination)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ int i;
+ int inter_name_termination = '\t';
+
+ if (!line_termination)
+ inter_name_termination = 0;
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ if ((diff_output_style == DIFF_FORMAT_NO_OUTPUT) ||
+ (p->status == DIFF_STATUS_UNKNOWN))
+ continue;
+ if (p->status == 0)
+ die("internal error in diff-resolve-rename-copy");
+ switch (diff_output_style) {
+ case DIFF_FORMAT_PATCH:
+ diff_flush_patch(p);
+ break;
+ case DIFF_FORMAT_RAW:
+ diff_flush_raw(p, line_termination,
+ inter_name_termination);
+ break;
+ case DIFF_FORMAT_NAME:
+ diff_flush_name(p, line_termination);
+ break;
+ }
+ }
+ for (i = 0; i < q->nr; i++)
+ diff_free_filepair(q->queue[i]);
+ free(q->queue);
+ q->queue = NULL;
+ q->nr = q->alloc = 0;
+}
+
+static void diffcore_apply_filter(const char *filter)
+{
+ int i;
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct diff_queue_struct outq;
+ outq.queue = NULL;
+ outq.nr = outq.alloc = 0;
+
+ if (!filter)
+ return;
+
+ if (strchr(filter, DIFF_STATUS_FILTER_AON)) {
+ int found;
+ for (i = found = 0; !found && i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ if (((p->status == DIFF_STATUS_MODIFIED) &&
+ ((p->score &&
+ strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
+ (!p->score &&
+ strchr(filter, DIFF_STATUS_MODIFIED)))) ||
+ ((p->status != DIFF_STATUS_MODIFIED) &&
+ strchr(filter, p->status)))
+ found++;
+ }
+ if (found)
+ return;
+
+ /* otherwise we will clear the whole queue
+ * by copying the empty outq at the end of this
+ * function, but first clear the current entries
+ * in the queue.
+ */
+ for (i = 0; i < q->nr; i++)
+ diff_free_filepair(q->queue[i]);
+ }
+ else {
+ /* Only the matching ones */
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+
+ if (((p->status == DIFF_STATUS_MODIFIED) &&
+ ((p->score &&
+ strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
+ (!p->score &&
+ strchr(filter, DIFF_STATUS_MODIFIED)))) ||
+ ((p->status != DIFF_STATUS_MODIFIED) &&
+ strchr(filter, p->status)))
+ diff_q(&outq, p);
+ else
+ diff_free_filepair(p);
+ }
+ }
+ free(q->queue);
+ *q = outq;
+}
+
+void diffcore_std(const char **paths,
+ int detect_rename, int rename_score,
+ const char *pickaxe, int pickaxe_opts,
+ int break_opt,
+ const char *orderfile,
+ const char *filter)
+{
+ if (paths && paths[0])
+ diffcore_pathspec(paths);
+ if (break_opt != -1)
+ diffcore_break(break_opt);
+ if (detect_rename)
+ diffcore_rename(detect_rename, rename_score);
+ if (break_opt != -1)
+ diffcore_merge_broken();
+ if (pickaxe)
+ diffcore_pickaxe(pickaxe, pickaxe_opts);
+ if (orderfile)
+ diffcore_order(orderfile);
+ diff_resolve_rename_copy();
+ diffcore_apply_filter(filter);
+}
+
+
+void diffcore_std_no_resolve(const char **paths,
+ const char *pickaxe, int pickaxe_opts,
+ const char *orderfile,
+ const char *filter)
+{
+ if (paths && paths[0])
+ diffcore_pathspec(paths);
+ if (pickaxe)
+ diffcore_pickaxe(pickaxe, pickaxe_opts);
+ if (orderfile)
+ diffcore_order(orderfile);
+ diffcore_apply_filter(filter);
+}
+
+void diff_addremove(int addremove, unsigned mode,
+ const unsigned char *sha1,
+ const char *base, const char *path)
+{
+ char concatpath[PATH_MAX];
+ struct diff_filespec *one, *two;
+
+ /* This may look odd, but it is a preparation for
+ * feeding "there are unchanged files which should
+ * not produce diffs, but when you are doing copy
+ * detection you would need them, so here they are"
+ * entries to the diff-core. They will be prefixed
+ * with something like '=' or '*' (I haven't decided
+ * which but should not make any difference).
+ * Feeding the same new and old to diff_change()
+ * also has the same effect.
+ * Before the final output happens, they are pruned after
+ * merged into rename/copy pairs as appropriate.
+ */
+ if (reverse_diff)
+ addremove = (addremove == '+' ? '-' :
+ addremove == '-' ? '+' : addremove);
+
+ if (!path) path = "";
+ sprintf(concatpath, "%s%s", base, path);
+ one = alloc_filespec(concatpath);
+ two = alloc_filespec(concatpath);
+
+ if (addremove != '+')
+ fill_filespec(one, sha1, mode);
+ if (addremove != '-')
+ fill_filespec(two, sha1, mode);
+
+ diff_queue(&diff_queued_diff, one, two);
+}
+
+void diff_helper_input(unsigned old_mode,
+ unsigned new_mode,
+ const unsigned char *old_sha1,
+ const unsigned char *new_sha1,
+ const char *old_path,
+ int status,
+ int score,
+ const char *new_path)
+{
+ struct diff_filespec *one, *two;
+ struct diff_filepair *dp;
+
+ one = alloc_filespec(old_path);
+ two = alloc_filespec(new_path);
+ if (old_mode)
+ fill_filespec(one, old_sha1, old_mode);
+ if (new_mode)
+ fill_filespec(two, new_sha1, new_mode);
+ dp = diff_queue(&diff_queued_diff, one, two);
+ dp->score = score * MAX_SCORE / 100;
+ dp->status = status;
+}
+
+void diff_change(unsigned old_mode, unsigned new_mode,
+ const unsigned char *old_sha1,
+ const unsigned char *new_sha1,
+ const char *base, const char *path)
+{
+ char concatpath[PATH_MAX];
+ struct diff_filespec *one, *two;
+
+ if (reverse_diff) {
+ unsigned tmp;
+ const unsigned char *tmp_c;
+ tmp = old_mode; old_mode = new_mode; new_mode = tmp;
+ tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
+ }
+ if (!path) path = "";
+ sprintf(concatpath, "%s%s", base, path);
+ one = alloc_filespec(concatpath);
+ two = alloc_filespec(concatpath);
+ fill_filespec(one, old_sha1, old_mode);
+ fill_filespec(two, new_sha1, new_mode);
+
+ diff_queue(&diff_queued_diff, one, two);
+}
+
+void diff_unmerge(const char *path)
+{
+ struct diff_filespec *one, *two;
+ one = alloc_filespec(path);
+ two = alloc_filespec(path);
+ diff_queue(&diff_queued_diff, one, two);
+}
diff --git a/diff.h b/diff.h
new file mode 100644
index 0000000..3deb7fa
--- /dev/null
+++ b/diff.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#ifndef DIFF_H
+#define DIFF_H
+
+#define DIFF_FILE_CANON_MODE(mode) \
+ (S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \
+ S_ISLNK(mode) ? S_IFLNK : S_IFDIR)
+
+extern void diff_addremove(int addremove,
+ unsigned mode,
+ const unsigned char *sha1,
+ const char *base,
+ const char *path);
+
+extern void diff_change(unsigned mode1, unsigned mode2,
+ const unsigned char *sha1,
+ const unsigned char *sha2,
+ const char *base, const char *path);
+
+extern void diff_helper_input(unsigned mode1,
+ unsigned mode2,
+ const unsigned char *sha1,
+ const unsigned char *sha2,
+ const char *path1,
+ int status,
+ int score,
+ const char *path2);
+
+extern void diff_unmerge(const char *path);
+
+extern int diff_scoreopt_parse(const char *opt);
+
+#define DIFF_SETUP_REVERSE 1
+#define DIFF_SETUP_USE_CACHE 2
+#define DIFF_SETUP_USE_SIZE_CACHE 4
+
+extern void diff_setup(int flags);
+
+#define DIFF_DETECT_RENAME 1
+#define DIFF_DETECT_COPY 2
+
+#define DIFF_PICKAXE_ALL 1
+
+extern void diffcore_std(const char **paths,
+ int detect_rename, int rename_score,
+ const char *pickaxe, int pickaxe_opts,
+ int break_opt,
+ const char *orderfile, const char *filter);
+
+extern void diffcore_std_no_resolve(const char **paths,
+ const char *pickaxe, int pickaxe_opts,
+ const char *orderfile, const char *filter);
+
+#define COMMON_DIFF_OPTIONS_HELP \
+"\ncommon diff options:\n" \
+" -r diff recursively (only meaningful in diff-tree)\n" \
+" -z output diff-raw with lines terminated with NUL.\n" \
+" -p output patch format.\n" \
+" -u synonym for -p.\n" \
+" --name-only show only names of changed files.\n" \
+" --name-only-z\n" \
+" same as --name-only but terminate lines with NUL.\n" \
+" -R swap input file pairs.\n" \
+" -B detect complete rewrites.\n" \
+" -M detect renames.\n" \
+" -C detect copies.\n" \
+" --find-copies-harder\n" \
+" try unchanged files as candidate for copy detection.\n" \
+" -O<file> reorder diffs according to the <file>.\n" \
+" -S<string> find filepair whose only one side contains the string.\n" \
+" --pickaxe-all\n" \
+" show all files diff when -S is used and hit is found.\n"
+
+extern int diff_queue_is_empty(void);
+
+#define DIFF_FORMAT_RAW 1
+#define DIFF_FORMAT_PATCH 2
+#define DIFF_FORMAT_NO_OUTPUT 3
+#define DIFF_FORMAT_NAME 4
+
+extern void diff_flush(int output_style, int line_terminator);
+
+/* diff-raw status letters */
+#define DIFF_STATUS_ADDED 'A'
+#define DIFF_STATUS_COPIED 'C'
+#define DIFF_STATUS_DELETED 'D'
+#define DIFF_STATUS_MODIFIED 'M'
+#define DIFF_STATUS_RENAMED 'R'
+#define DIFF_STATUS_TYPE_CHANGED 'T'
+#define DIFF_STATUS_UNKNOWN 'X'
+#define DIFF_STATUS_UNMERGED 'U'
+
+/* these are not diff-raw status letters proper, but used by
+ * diffcore-filter insn to specify additional restrictions.
+ */
+#define DIFF_STATUS_FILTER_AON 'A'
+#define DIFF_STATUS_FILTER_BROKEN 'B'
+
+#endif /* DIFF_H */
diff --git a/diffcore-break.c b/diffcore-break.c
new file mode 100644
index 0000000..06f9a7f
--- /dev/null
+++ b/diffcore-break.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "delta.h"
+#include "count-delta.h"
+
+static int should_break(struct diff_filespec *src,
+ struct diff_filespec *dst,
+ int break_score,
+ int *merge_score_p)
+{
+ /* dst is recorded as a modification of src. Are they so
+ * different that we are better off recording this as a pair
+ * of delete and create?
+ *
+ * There are two criteria used in this algorithm. For the
+ * purposes of helping later rename/copy, we take both delete
+ * and insert into account and estimate the amount of "edit".
+ * If the edit is very large, we break this pair so that
+ * rename/copy can pick the pieces up to match with other
+ * files.
+ *
+ * On the other hand, we would want to ignore inserts for the
+ * pure "complete rewrite" detection. As long as most of the
+ * existing contents were removed from the file, it is a
+ * complete rewrite, and if sizable chunk from the original
+ * still remains in the result, it is not a rewrite. It does
+ * not matter how much or how little new material is added to
+ * the file.
+ *
+ * The score we leave for such a broken filepair uses the
+ * latter definition so that later clean-up stage can find the
+ * pieces that should not have been broken according to the
+ * latter definition after rename/copy runs, and merge the
+ * broken pair that have a score lower than given criteria
+ * back together. The break operation itself happens
+ * according to the former definition.
+ *
+ * The minimum_edit parameter tells us when to break (the
+ * amount of "edit" required for us to consider breaking the
+ * pair). We leave the amount of deletion in *merge_score_p
+ * when we return.
+ *
+ * The value we return is 1 if we want the pair to be broken,
+ * or 0 if we do not.
+ */
+ void *delta;
+ unsigned long delta_size, base_size, src_copied, literal_added;
+ int to_break = 0;
+
+ *merge_score_p = 0; /* assume no deletion --- "do not break"
+ * is the default.
+ */
+
+ if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
+ return 0; /* leave symlink rename alone */
+
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ return 0; /* error but caught downstream */
+
+ base_size = ((src->size < dst->size) ? src->size : dst->size);
+
+ delta = diff_delta(src->data, src->size,
+ dst->data, dst->size,
+ &delta_size, 0);
+
+ /* Estimate the edit size by interpreting delta. */
+ if (count_delta(delta, delta_size,
+ &src_copied, &literal_added)) {
+ free(delta);
+ return 0; /* we cannot tell */
+ }
+ free(delta);
+
+ /* Compute merge-score, which is "how much is removed
+ * from the source material". The clean-up stage will
+ * merge the surviving pair together if the score is
+ * less than the minimum, after rename/copy runs.
+ */
+ if (src->size <= src_copied)
+ ; /* all copied, nothing removed */
+ else {
+ delta_size = src->size - src_copied;
+ *merge_score_p = delta_size * MAX_SCORE / src->size;
+ }
+
+ /* Extent of damage, which counts both inserts and
+ * deletes.
+ */
+ if (src->size + literal_added <= src_copied)
+ delta_size = 0; /* avoid wrapping around */
+ else
+ delta_size = (src->size - src_copied) + literal_added;
+
+ /* We break if the edit exceeds the minimum.
+ * i.e. (break_score / MAX_SCORE < delta_size / base_size)
+ */
+ if (break_score * base_size < delta_size * MAX_SCORE)
+ to_break = 1;
+
+ return to_break;
+}
+
+void diffcore_break(int break_score)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct diff_queue_struct outq;
+
+ /* When the filepair has this much edit (insert and delete),
+ * it is first considered to be a rewrite and broken into a
+ * create and delete filepair. This is to help breaking a
+ * file that had too much new stuff added, possibly from
+ * moving contents from another file, so that rename/copy can
+ * match it with the other file.
+ *
+ * int break_score; we reuse incoming parameter for this.
+ */
+
+ /* After a pair is broken according to break_score and
+ * subjected to rename/copy, both of them may survive intact,
+ * due to lack of suitable rename/copy peer. Or, the caller
+ * may be calling us without using rename/copy. When that
+ * happens, we merge the broken pieces back into one
+ * modification together if the pair did not have more than
+ * this much delete. For this computation, we do not take
+ * insert into account at all. If you start from a 100-line
+ * file and delete 97 lines of it, it does not matter if you
+ * add 27 lines to it to make a new 30-line file or if you add
+ * 997 lines to it to make a 1000-line file. Either way what
+ * you did was a rewrite of 97%. On the other hand, if you
+ * delete 3 lines, keeping 97 lines intact, it does not matter
+ * if you add 3 lines to it to make a new 100-line file or if
+ * you add 903 lines to it to make a new 1000-line file.
+ * Either way you did a lot of additions and not a rewrite.
+ * This merge happens to catch the latter case. A merge_score
+ * of 80% would be a good default value (a broken pair that
+ * has score lower than merge_score will be merged back
+ * together).
+ */
+ int merge_score;
+ int i;
+
+ /* See comment on DEFAULT_BREAK_SCORE and
+ * DEFAULT_MERGE_SCORE in diffcore.h
+ */
+ merge_score = (break_score >> 16) & 0xFFFF;
+ break_score = (break_score & 0xFFFF);
+
+ if (!break_score)
+ break_score = DEFAULT_BREAK_SCORE;
+ if (!merge_score)
+ merge_score = DEFAULT_MERGE_SCORE;
+
+ outq.nr = outq.alloc = 0;
+ outq.queue = NULL;
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ int score;
+
+ /* We deal only with in-place edit of non directory.
+ * We do not break anything else.
+ */
+ if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) &&
+ !S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) &&
+ !strcmp(p->one->path, p->two->path)) {
+ if (should_break(p->one, p->two,
+ break_score, &score) &&
+ MINIMUM_BREAK_SIZE <= p->one->size) {
+ /* Split this into delete and create */
+ struct diff_filespec *null_one, *null_two;
+ struct diff_filepair *dp;
+
+ /* Set score to 0 for the pair that
+ * needs to be merged back together
+ * should they survive rename/copy.
+ * Also we do not want to break very
+ * small files.
+ */
+ if (score < merge_score)
+ score = 0;
+
+ /* deletion of one */
+ null_one = alloc_filespec(p->one->path);
+ dp = diff_queue(&outq, p->one, null_one);
+ dp->score = score;
+ dp->broken_pair = 1;
+
+ /* creation of two */
+ null_two = alloc_filespec(p->two->path);
+ dp = diff_queue(&outq, null_two, p->two);
+ dp->score = score;
+ dp->broken_pair = 1;
+
+ free(p); /* not diff_free_filepair(), we are
+ * reusing one and two here.
+ */
+ continue;
+ }
+ }
+ diff_q(&outq, p);
+ }
+ free(q->queue);
+ *q = outq;
+
+ return;
+}
+
+static void merge_broken(struct diff_filepair *p,
+ struct diff_filepair *pp,
+ struct diff_queue_struct *outq)
+{
+ /* p and pp are broken pairs we want to merge */
+ struct diff_filepair *c = p, *d = pp, *dp;
+ if (DIFF_FILE_VALID(p->one)) {
+ /* this must be a delete half */
+ d = p; c = pp;
+ }
+ /* Sanity check */
+ if (!DIFF_FILE_VALID(d->one))
+ die("internal error in merge #1");
+ if (DIFF_FILE_VALID(d->two))
+ die("internal error in merge #2");
+ if (DIFF_FILE_VALID(c->one))
+ die("internal error in merge #3");
+ if (!DIFF_FILE_VALID(c->two))
+ die("internal error in merge #4");
+
+ dp = diff_queue(outq, d->one, c->two);
+ dp->score = p->score;
+ diff_free_filespec_data(d->two);
+ diff_free_filespec_data(c->one);
+ free(d);
+ free(c);
+}
+
+void diffcore_merge_broken(void)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct diff_queue_struct outq;
+ int i, j;
+
+ outq.nr = outq.alloc = 0;
+ outq.queue = NULL;
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ if (!p)
+ /* we already merged this with its peer */
+ continue;
+ else if (p->broken_pair &&
+ !strcmp(p->one->path, p->two->path)) {
+ /* If the peer also survived rename/copy, then
+ * we merge them back together.
+ */
+ for (j = i + 1; j < q->nr; j++) {
+ struct diff_filepair *pp = q->queue[j];
+ if (pp->broken_pair &&
+ !strcmp(pp->one->path, pp->two->path) &&
+ !strcmp(p->one->path, pp->two->path)) {
+ /* Peer survived. Merge them */
+ merge_broken(p, pp, &outq);
+ q->queue[j] = NULL;
+ break;
+ }
+ }
+ if (q->nr <= j)
+ /* The peer did not survive, so we keep
+ * it in the output.
+ */
+ diff_q(&outq, p);
+ }
+ else
+ diff_q(&outq, p);
+ }
+ free(q->queue);
+ *q = outq;
+
+ return;
+}
diff --git a/diffcore-order.c b/diffcore-order.c
new file mode 100644
index 0000000..a03862c
--- /dev/null
+++ b/diffcore-order.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+#include <fnmatch.h>
+
+static char **order;
+static int order_cnt;
+
+static void prepare_order(const char *orderfile)
+{
+ int fd, cnt, pass;
+ void *map;
+ char *cp, *endp;
+ struct stat st;
+
+ if (order)
+ return;
+
+ fd = open(orderfile, O_RDONLY);
+ if (fd < 0)
+ return;
+ if (fstat(fd, &st)) {
+ close(fd);
+ return;
+ }
+ map = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ close(fd);
+ if (-1 == (int)(long)map)
+ return;
+ endp = map + st.st_size;
+ for (pass = 0; pass < 2; pass++) {
+ cnt = 0;
+ cp = map;
+ while (cp < endp) {
+ char *ep;
+ for (ep = cp; ep < endp && *ep != '\n'; ep++)
+ ;
+ /* cp to ep has one line */
+ if (*cp == '\n' || *cp == '#')
+ ; /* comment */
+ else if (pass == 0)
+ cnt++;
+ else {
+ if (*ep == '\n') {
+ *ep = 0;
+ order[cnt] = cp;
+ }
+ else {
+ order[cnt] = xmalloc(ep-cp+1);
+ memcpy(order[cnt], cp, ep-cp);
+ order[cnt][ep-cp] = 0;
+ }
+ cnt++;
+ }
+ if (ep < endp)
+ ep++;
+ cp = ep;
+ }
+ if (pass == 0) {
+ order_cnt = cnt;
+ order = xmalloc(sizeof(*order) * cnt);
+ }
+ }
+}
+
+struct pair_order {
+ struct diff_filepair *pair;
+ int orig_order;
+ int order;
+};
+
+static int match_order(const char *path)
+{
+ int i;
+ char p[PATH_MAX];
+
+ for (i = 0; i < order_cnt; i++) {
+ strcpy(p, path);
+ while (p[0]) {
+ char *cp;
+ if (!fnmatch(order[i], p, 0))
+ return i;
+ cp = strrchr(p, '/');
+ if (!cp)
+ break;
+ *cp = 0;
+ }
+ }
+ return order_cnt;
+}
+
+static int compare_pair_order(const void *a_, const void *b_)
+{
+ struct pair_order const *a, *b;
+ a = (struct pair_order const *)a_;
+ b = (struct pair_order const *)b_;
+ if (a->order != b->order)
+ return a->order - b->order;
+ return a->orig_order - b->orig_order;
+}
+
+void diffcore_order(const char *orderfile)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct pair_order *o = xmalloc(sizeof(*o) * q->nr);
+ int i;
+
+ prepare_order(orderfile);
+ for (i = 0; i < q->nr; i++) {
+ o[i].pair = q->queue[i];
+ o[i].orig_order = i;
+ o[i].order = match_order(o[i].pair->two->path);
+ }
+ qsort(o, q->nr, sizeof(*o), compare_pair_order);
+ for (i = 0; i < q->nr; i++)
+ q->queue[i] = o[i].pair;
+ free(o);
+ return;
+}
diff --git a/diffcore-pathspec.c b/diffcore-pathspec.c
new file mode 100644
index 0000000..a48acbc
--- /dev/null
+++ b/diffcore-pathspec.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+
+struct path_spec {
+ const char *spec;
+ int len;
+};
+
+static int matches_pathspec(const char *name, struct path_spec *s, int cnt)
+{
+ int i;
+ int namelen;
+
+ if (cnt == 0)
+ return 1;
+
+ namelen = strlen(name);
+ for (i = 0; i < cnt; i++) {
+ int len = s[i].len;
+ if (namelen < len)
+ continue;
+ if (memcmp(s[i].spec, name, len))
+ continue;
+ if (s[i].spec[len-1] == '/' ||
+ name[len] == 0 ||
+ name[len] == '/')
+ return 1;
+ }
+ return 0;
+}
+
+void diffcore_pathspec(const char **pathspec)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ int i, speccnt;
+ struct diff_queue_struct outq;
+ struct path_spec *spec;
+
+ outq.queue = NULL;
+ outq.nr = outq.alloc = 0;
+
+ for (i = 0; pathspec[i]; i++)
+ ;
+ speccnt = i;
+ spec = xmalloc(sizeof(*spec) * speccnt);
+ for (i = 0; pathspec[i]; i++) {
+ spec[i].spec = pathspec[i];
+ spec[i].len = strlen(pathspec[i]);
+ }
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ if (matches_pathspec(p->two->path, spec, speccnt))
+ diff_q(&outq, p);
+ else
+ diff_free_filepair(p);
+ }
+ free(q->queue);
+ *q = outq;
+ return;
+}
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
new file mode 100644
index 0000000..50e46ab
--- /dev/null
+++ b/diffcore-pickaxe.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+
+static unsigned int contains(struct diff_filespec *one,
+ const char *needle, unsigned long len)
+{
+ unsigned int cnt;
+ unsigned long offset, sz;
+ const char *data;
+ if (diff_populate_filespec(one, 0))
+ return 0;
+
+ sz = one->size;
+ data = one->data;
+ cnt = 0;
+
+ /* Yes, I've heard of strstr(), but the thing is *data may
+ * not be NUL terminated. Sue me.
+ */
+ for (offset = 0; offset + len <= sz; offset++) {
+ /* we count non-overlapping occurrences of needle */
+ if (!memcmp(needle, data + offset, len)) {
+ offset += len - 1;
+ cnt++;
+ }
+ }
+ return cnt;
+}
+
+void diffcore_pickaxe(const char *needle, int opts)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ unsigned long len = strlen(needle);
+ int i, has_changes;
+ struct diff_queue_struct outq;
+ outq.queue = NULL;
+ outq.nr = outq.alloc = 0;
+
+ if (opts & DIFF_PICKAXE_ALL) {
+ /* Showing the whole changeset if needle exists */
+ for (i = has_changes = 0; !has_changes && i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ if (!DIFF_FILE_VALID(p->one)) {
+ if (!DIFF_FILE_VALID(p->two))
+ continue; /* ignore unmerged */
+ /* created */
+ if (contains(p->two, needle, len))
+ has_changes++;
+ }
+ else if (!DIFF_FILE_VALID(p->two)) {
+ if (contains(p->one, needle, len))
+ has_changes++;
+ }
+ else if (!diff_unmodified_pair(p) &&
+ contains(p->one, needle, len) !=
+ contains(p->two, needle, len))
+ has_changes++;
+ }
+ if (has_changes)
+ return; /* not munge the queue */
+
+ /* otherwise we will clear the whole queue
+ * by copying the empty outq at the end of this
+ * function, but first clear the current entries
+ * in the queue.
+ */
+ for (i = 0; i < q->nr; i++)
+ diff_free_filepair(q->queue[i]);
+ }
+ else
+ /* Showing only the filepairs that has the needle */
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ has_changes = 0;
+ if (!DIFF_FILE_VALID(p->one)) {
+ if (!DIFF_FILE_VALID(p->two))
+ ; /* ignore unmerged */
+ /* created */
+ else if (contains(p->two, needle, len))
+ has_changes = 1;
+ }
+ else if (!DIFF_FILE_VALID(p->two)) {
+ if (contains(p->one, needle, len))
+ has_changes = 1;
+ }
+ else if (!diff_unmodified_pair(p) &&
+ contains(p->one, needle, len) !=
+ contains(p->two, needle, len))
+ has_changes = 1;
+
+ if (has_changes)
+ diff_q(&outq, p);
+ else
+ diff_free_filepair(p);
+ }
+
+ free(q->queue);
+ *q = outq;
+ return;
+}
diff --git a/diffcore-rename.c b/diffcore-rename.c
new file mode 100644
index 0000000..6a52699
--- /dev/null
+++ b/diffcore-rename.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "delta.h"
+#include "count-delta.h"
+
+/* Table of rename/copy destinations */
+
+static struct diff_rename_dst {
+ struct diff_filespec *two;
+ struct diff_filepair *pair;
+} *rename_dst;
+static int rename_dst_nr, rename_dst_alloc;
+
+static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two,
+ int insert_ok)
+{
+ int first, last;
+
+ first = 0;
+ last = rename_dst_nr;
+ while (last > first) {
+ int next = (last + first) >> 1;
+ struct diff_rename_dst *dst = &(rename_dst[next]);
+ int cmp = strcmp(two->path, dst->two->path);
+ if (!cmp)
+ return dst;
+ if (cmp < 0) {
+ last = next;
+ continue;
+ }
+ first = next+1;
+ }
+ /* not found */
+ if (!insert_ok)
+ return NULL;
+ /* insert to make it at "first" */
+ if (rename_dst_alloc <= rename_dst_nr) {
+ rename_dst_alloc = alloc_nr(rename_dst_alloc);
+ rename_dst = xrealloc(rename_dst,
+ rename_dst_alloc * sizeof(*rename_dst));
+ }
+ rename_dst_nr++;
+ if (first < rename_dst_nr)
+ memmove(rename_dst + first + 1, rename_dst + first,
+ (rename_dst_nr - first - 1) * sizeof(*rename_dst));
+ rename_dst[first].two = two;
+ rename_dst[first].pair = NULL;
+ return &(rename_dst[first]);
+}
+
+/* Table of rename/copy src files */
+static struct diff_rename_src {
+ struct diff_filespec *one;
+ unsigned src_stays : 1;
+} *rename_src;
+static int rename_src_nr, rename_src_alloc;
+
+static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
+ int src_stays)
+{
+ int first, last;
+
+ first = 0;
+ last = rename_src_nr;
+ while (last > first) {
+ int next = (last + first) >> 1;
+ struct diff_rename_src *src = &(rename_src[next]);
+ int cmp = strcmp(one->path, src->one->path);
+ if (!cmp)
+ return src;
+ if (cmp < 0) {
+ last = next;
+ continue;
+ }
+ first = next+1;
+ }
+
+ /* insert to make it at "first" */
+ if (rename_src_alloc <= rename_src_nr) {
+ rename_src_alloc = alloc_nr(rename_src_alloc);
+ rename_src = xrealloc(rename_src,
+ rename_src_alloc * sizeof(*rename_src));
+ }
+ rename_src_nr++;
+ if (first < rename_src_nr)
+ memmove(rename_src + first + 1, rename_src + first,
+ (rename_src_nr - first - 1) * sizeof(*rename_src));
+ rename_src[first].one = one;
+ rename_src[first].src_stays = src_stays;
+ return &(rename_src[first]);
+}
+
+static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst)
+{
+ if (src->sha1_valid && dst->sha1_valid &&
+ !memcmp(src->sha1, dst->sha1, 20))
+ return 1;
+ if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
+ return 0;
+ if (src->size != dst->size)
+ return 0;
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ return 0;
+ if (src->size == dst->size &&
+ !memcmp(src->data, dst->data, src->size))
+ return 1;
+ return 0;
+}
+
+struct diff_score {
+ int src; /* index in rename_src */
+ int dst; /* index in rename_dst */
+ int score;
+};
+
+static int estimate_similarity(struct diff_filespec *src,
+ struct diff_filespec *dst,
+ int minimum_score)
+{
+ /* src points at a file that existed in the original tree (or
+ * optionally a file in the destination tree) and dst points
+ * at a newly created file. They may be quite similar, in which
+ * case we want to say src is renamed to dst or src is copied into
+ * dst, and then some edit has been applied to dst.
+ *
+ * Compare them and return how similar they are, representing
+ * the score as an integer between 0 and MAX_SCORE.
+ *
+ * When there is an exact match, it is considered a better
+ * match than anything else; the destination does not even
+ * call into this function in that case.
+ */
+ void *delta;
+ unsigned long delta_size, base_size, src_copied, literal_added;
+ unsigned long delta_limit;
+ int score;
+
+ /* We deal only with regular files. Symlink renames are handled
+ * only when they are exact matches --- in other words, no edits
+ * after renaming.
+ */
+ if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
+ return 0;
+
+ delta_size = ((src->size < dst->size) ?
+ (dst->size - src->size) : (src->size - dst->size));
+ base_size = ((src->size < dst->size) ? src->size : dst->size);
+
+ /* We would not consider edits that change the file size so
+ * drastically. delta_size must be smaller than
+ * (MAX_SCORE-minimum_score)/MAX_SCORE * min(src->size, dst->size).
+ *
+ * Note that base_size == 0 case is handled here already
+ * and the final score computation below would not have a
+ * divide-by-zero issue.
+ */
+ if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
+ return 0;
+
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ return 0; /* error but caught downstream */
+
+ delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
+ delta = diff_delta(src->data, src->size,
+ dst->data, dst->size,
+ &delta_size, delta_limit);
+ if (!delta)
+ /* If delta_limit is exceeded, we have too much differences */
+ return 0;
+
+ /* A delta that has a lot of literal additions would have
+ * big delta_size no matter what else it does.
+ */
+ if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
+ return 0;
+
+ /* Estimate the edit size by interpreting delta. */
+ if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
+ free(delta);
+ return 0;
+ }
+ free(delta);
+
+ /* Extent of damage */
+ if (src->size + literal_added < src_copied)
+ delta_size = 0;
+ else
+ delta_size = (src->size - src_copied) + literal_added;
+
+ /*
+ * Now we will give some score to it. 100% edit gets 0 points
+ * and 0% edit gets MAX_SCORE points.
+ */
+ score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
+ if (score < 0) return 0;
+ if (MAX_SCORE < score) return MAX_SCORE;
+ return score;
+}
+
+static void record_rename_pair(struct diff_queue_struct *renq,
+ int dst_index, int src_index, int score)
+{
+ struct diff_filespec *one, *two, *src, *dst;
+ struct diff_filepair *dp;
+
+ if (rename_dst[dst_index].pair)
+ die("internal error: dst already matched.");
+
+ src = rename_src[src_index].one;
+ one = alloc_filespec(src->path);
+ fill_filespec(one, src->sha1, src->mode);
+
+ dst = rename_dst[dst_index].two;
+ two = alloc_filespec(dst->path);
+ fill_filespec(two, dst->sha1, dst->mode);
+
+ dp = diff_queue(renq, one, two);
+ dp->score = score;
+ dp->source_stays = rename_src[src_index].src_stays;
+ rename_dst[dst_index].pair = dp;
+}
+
+/*
+ * We sort the rename similarity matrix with the score, in descending
+ * order (the most similar first).
+ */
+static int score_compare(const void *a_, const void *b_)
+{
+ const struct diff_score *a = a_, *b = b_;
+ return b->score - a->score;
+}
+
+void diffcore_rename(int detect_rename, int minimum_score)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct diff_queue_struct renq, outq;
+ struct diff_score *mx;
+ int i, j;
+ int num_create, num_src, dst_cnt;
+
+ if (!minimum_score)
+ minimum_score = DEFAULT_RENAME_SCORE;
+ renq.queue = NULL;
+ renq.nr = renq.alloc = 0;
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ if (!DIFF_FILE_VALID(p->one))
+ if (!DIFF_FILE_VALID(p->two))
+ continue; /* unmerged */
+ else
+ locate_rename_dst(p->two, 1);
+ else if (!DIFF_FILE_VALID(p->two)) {
+ /* If the source is a broken "delete", and
+ * they did not really want to get broken,
+ * that means the source actually stays.
+ */
+ int stays = (p->broken_pair && !p->score);
+ register_rename_src(p->one, stays);
+ }
+ else if (detect_rename == DIFF_DETECT_COPY)
+ register_rename_src(p->one, 1);
+ }
+ if (rename_dst_nr == 0)
+ goto cleanup; /* nothing to do */
+
+ /* We really want to cull the candidates list early
+ * with cheap tests in order to avoid doing deltas.
+ */
+ for (i = 0; i < rename_dst_nr; i++) {
+ struct diff_filespec *two = rename_dst[i].two;
+ for (j = 0; j < rename_src_nr; j++) {
+ struct diff_filespec *one = rename_src[j].one;
+ if (!is_exact_match(one, two))
+ continue;
+ record_rename_pair(&renq, i, j, MAX_SCORE);
+ break; /* we are done with this entry */
+ }
+ }
+ diff_debug_queue("done detecting exact", &renq);
+
+ /* Have we run out the created file pool? If so we can avoid
+ * doing the delta matrix altogether.
+ */
+ if (renq.nr == rename_dst_nr)
+ goto cleanup;
+
+ num_create = (rename_dst_nr - renq.nr);
+ num_src = rename_src_nr;
+ mx = xmalloc(sizeof(*mx) * num_create * num_src);
+ for (dst_cnt = i = 0; i < rename_dst_nr; i++) {
+ int base = dst_cnt * num_src;
+ struct diff_filespec *two = rename_dst[i].two;
+ if (rename_dst[i].pair)
+ continue; /* dealt with exact match already. */
+ for (j = 0; j < rename_src_nr; j++) {
+ struct diff_filespec *one = rename_src[j].one;
+ struct diff_score *m = &mx[base+j];
+ m->src = j;
+ m->dst = i;
+ m->score = estimate_similarity(one, two,
+ minimum_score);
+ }
+ dst_cnt++;
+ }
+ /* cost matrix sorted by most to least similar pair */
+ qsort(mx, num_create * num_src, sizeof(*mx), score_compare);
+ for (i = 0; i < num_create * num_src; i++) {
+ struct diff_rename_dst *dst = &rename_dst[mx[i].dst];
+ if (dst->pair)
+ continue; /* already done, either exact or fuzzy. */
+ if (mx[i].score < minimum_score)
+ break; /* there is no more usable pair. */
+ record_rename_pair(&renq, mx[i].dst, mx[i].src, mx[i].score);
+ }
+ free(mx);
+ diff_debug_queue("done detecting fuzzy", &renq);
+
+ cleanup:
+ /* At this point, we have found some renames and copies and they
+ * are kept in renq. The original list is still in *q.
+ */
+ outq.queue = NULL;
+ outq.nr = outq.alloc = 0;
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ struct diff_filepair *pair_to_free = NULL;
+
+ if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
+ /*
+ * Creation
+ *
+ * We would output this create record if it has
+ * not been turned into a rename/copy already.
+ */
+ struct diff_rename_dst *dst =
+ locate_rename_dst(p->two, 0);
+ if (dst && dst->pair) {
+ diff_q(&outq, dst->pair);
+ pair_to_free = p;
+ }
+ else
+ /* no matching rename/copy source, so
+ * record this as a creation.
+ */
+ diff_q(&outq, p);
+ }
+ else if (DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two)) {
+ /*
+ * Deletion
+ *
+ * We would output this delete record if:
+ *
+ * (1) this is a broken delete and the counterpart
+ * broken create remains in the output; or
+ * (2) this is not a broken delete, and renq does
+ * not have a rename/copy to move p->one->path
+ * out.
+ *
+ * Otherwise, the counterpart broken create
+ * has been turned into a rename-edit; or
+ * delete did not have a matching create to
+ * begin with.
+ */
+ if (DIFF_PAIR_BROKEN(p)) {
+ /* broken delete */
+ struct diff_rename_dst *dst =
+ locate_rename_dst(p->one, 0);
+ if (dst && dst->pair)
+ /* counterpart is now rename/copy */
+ pair_to_free = p;
+ }
+ else {
+ for (j = 0; j < renq.nr; j++)
+ if (!strcmp(renq.queue[j]->one->path,
+ p->one->path))
+ break;
+ if (j < renq.nr)
+ /* this path remains */
+ pair_to_free = p;
+ }
+
+ if (pair_to_free)
+ ;
+ else
+ diff_q(&outq, p);
+ }
+ else if (!diff_unmodified_pair(p))
+ /* all the usual ones need to be kept */
+ diff_q(&outq, p);
+ else
+ /* no need to keep unmodified pairs */
+ pair_to_free = p;
+
+ if (pair_to_free)
+ diff_free_filepair(pair_to_free);
+ }
+ diff_debug_queue("done copying original", &outq);
+
+ free(renq.queue);
+ free(q->queue);
+ *q = outq;
+ diff_debug_queue("done collapsing", q);
+
+ free(rename_dst);
+ rename_dst = NULL;
+ rename_dst_nr = rename_dst_alloc = 0;
+ free(rename_src);
+ rename_src = NULL;
+ rename_src_nr = rename_src_alloc = 0;
+ return;
+}
diff --git a/diffcore.h b/diffcore.h
new file mode 100644
index 0000000..f1b5ca7
--- /dev/null
+++ b/diffcore.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#ifndef _DIFFCORE_H_
+#define _DIFFCORE_H_
+
+/* This header file is internal between diff.c and its diff transformers
+ * (e.g. diffcore-rename, diffcore-pickaxe). Never include this header
+ * in anything else.
+ */
+
+/* We internally use unsigned short as the score value,
+ * and rely on an int capable to hold 32-bits. -B can take
+ * -Bmerge_score/break_score format and the two scores are
+ * passed around in one int (high 16-bit for merge and low 16-bit
+ * for break).
+ */
+#define MAX_SCORE 60000
+#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
+#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/
+#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/
+
+#define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */
+
+struct diff_filespec {
+ unsigned char sha1[20];
+ char *path;
+ void *data;
+ unsigned long size;
+ int xfrm_flags; /* for use by the xfrm */
+ unsigned short mode; /* file mode */
+ unsigned sha1_valid : 1; /* if true, use sha1 and trust mode;
+ * if false, use the name and read from
+ * the filesystem.
+ */
+#define DIFF_FILE_VALID(spec) (((spec)->mode) != 0)
+ unsigned should_free : 1; /* data should be free()'ed */
+ unsigned should_munmap : 1; /* data should be munmap()'ed */
+};
+
+extern struct diff_filespec *alloc_filespec(const char *);
+extern void fill_filespec(struct diff_filespec *, const unsigned char *,
+ unsigned short);
+
+extern int diff_populate_filespec(struct diff_filespec *, int);
+extern void diff_free_filespec_data(struct diff_filespec *);
+
+struct diff_filepair {
+ struct diff_filespec *one;
+ struct diff_filespec *two;
+ unsigned short int score;
+ char status; /* M C R N D U (see Documentation/diff-format.txt) */
+ unsigned source_stays : 1; /* all of R/C are copies */
+ unsigned broken_pair : 1;
+};
+#define DIFF_PAIR_UNMERGED(p) \
+ (!DIFF_FILE_VALID((p)->one) && !DIFF_FILE_VALID((p)->two))
+
+#define DIFF_PAIR_RENAME(p) (strcmp((p)->one->path, (p)->two->path))
+
+#define DIFF_PAIR_BROKEN(p) \
+ ( (!DIFF_FILE_VALID((p)->one) != !DIFF_FILE_VALID((p)->two)) && \
+ ((p)->broken_pair != 0) )
+
+#define DIFF_PAIR_TYPE_CHANGED(p) \
+ ((S_IFMT & (p)->one->mode) != (S_IFMT & (p)->two->mode))
+
+#define DIFF_PAIR_MODE_CHANGED(p) ((p)->one->mode != (p)->two->mode)
+
+extern void diff_free_filepair(struct diff_filepair *);
+
+extern int diff_unmodified_pair(struct diff_filepair *);
+
+struct diff_queue_struct {
+ struct diff_filepair **queue;
+ int alloc;
+ int nr;
+};
+
+extern struct diff_queue_struct diff_queued_diff;
+extern struct diff_filepair *diff_queue(struct diff_queue_struct *,
+ struct diff_filespec *,
+ struct diff_filespec *);
+extern void diff_q(struct diff_queue_struct *, struct diff_filepair *);
+
+extern void diffcore_pathspec(const char **pathspec);
+extern void diffcore_break(int);
+extern void diffcore_rename(int rename_copy, int);
+extern void diffcore_merge_broken(void);
+extern void diffcore_pickaxe(const char *needle, int opts);
+extern void diffcore_order(const char *orderfile);
+
+#define DIFF_DEBUG 0
+#if DIFF_DEBUG
+void diff_debug_filespec(struct diff_filespec *, int, const char *);
+void diff_debug_filepair(const struct diff_filepair *, int);
+void diff_debug_queue(const char *, struct diff_queue_struct *);
+#else
+#define diff_debug_filespec(a,b,c) do {} while(0)
+#define diff_debug_filepair(a,b) do {} while(0)
+#define diff_debug_queue(a,b) do {} while(0)
+#endif
+
+#endif
diff --git a/entry.c b/entry.c
new file mode 100644
index 0000000..15b8fda
--- /dev/null
+++ b/entry.c
@@ -0,0 +1,156 @@
+#include <sys/types.h>
+#include <dirent.h>
+#include "cache.h"
+
+static void create_directories(const char *path, struct checkout *state)
+{
+ int len = strlen(path);
+ char *buf = xmalloc(len + 1);
+ const char *slash = path;
+
+ while ((slash = strchr(slash+1, '/')) != NULL) {
+ len = slash - path;
+ memcpy(buf, path, len);
+ buf[len] = 0;
+ if (mkdir(buf, 0777)) {
+ if (errno == EEXIST) {
+ struct stat st;
+ if (len > state->base_dir_len && state->force && !unlink(buf) && !mkdir(buf, 0777))
+ continue;
+ if (!stat(buf, &st) && S_ISDIR(st.st_mode))
+ continue; /* ok */
+ }
+ die("cannot create directory at %s", buf);
+ }
+ }
+ free(buf);
+}
+
+static void remove_subtree(const char *path)
+{
+ DIR *dir = opendir(path);
+ struct dirent *de;
+ char pathbuf[PATH_MAX];
+ char *name;
+
+ if (!dir)
+ die("cannot opendir %s", path);
+ strcpy(pathbuf, path);
+ name = pathbuf + strlen(path);
+ *name++ = '/';
+ while ((de = readdir(dir)) != NULL) {
+ struct stat st;
+ if ((de->d_name[0] == '.') &&
+ ((de->d_name[1] == 0) ||
+ ((de->d_name[1] == '.') && de->d_name[2] == 0)))
+ continue;
+ strcpy(name, de->d_name);
+ if (lstat(pathbuf, &st))
+ die("cannot lstat %s", pathbuf);
+ if (S_ISDIR(st.st_mode))
+ remove_subtree(pathbuf);
+ else if (unlink(pathbuf))
+ die("cannot unlink %s", pathbuf);
+ }
+ closedir(dir);
+ if (rmdir(path))
+ die("cannot rmdir %s", path);
+}
+
+static int create_file(const char *path, unsigned int mode)
+{
+ mode = (mode & 0100) ? 0777 : 0666;
+ return open(path, O_WRONLY | O_TRUNC | O_CREAT | O_EXCL, mode);
+}
+
+static int write_entry(struct cache_entry *ce, const char *path, struct checkout *state)
+{
+ int fd;
+ void *new;
+ unsigned long size;
+ long wrote;
+ char type[20];
+ char target[1024];
+
+ new = read_sha1_file(ce->sha1, type, &size);
+ if (!new || strcmp(type, "blob")) {
+ if (new)
+ free(new);
+ return error("git-checkout-cache: unable to read sha1 file of %s (%s)",
+ path, sha1_to_hex(ce->sha1));
+ }
+ switch (ntohl(ce->ce_mode) & S_IFMT) {
+ case S_IFREG:
+ fd = create_file(path, ntohl(ce->ce_mode));
+ if (fd < 0) {
+ free(new);
+ return error("git-checkout-cache: unable to create file %s (%s)",
+ path, strerror(errno));
+ }
+ wrote = write(fd, new, size);
+ close(fd);
+ free(new);
+ if (wrote != size)
+ return error("git-checkout-cache: unable to write file %s", path);
+ break;
+ case S_IFLNK:
+ memcpy(target, new, size);
+ target[size] = '\0';
+ if (symlink(target, path)) {
+ free(new);
+ return error("git-checkout-cache: unable to create symlink %s (%s)",
+ path, strerror(errno));
+ }
+ free(new);
+ break;
+ default:
+ free(new);
+ return error("git-checkout-cache: unknown file mode for %s", path);
+ }
+
+ if (state->refresh_cache) {
+ struct stat st;
+ lstat(ce->name, &st);
+ fill_stat_cache_info(ce, &st);
+ }
+ return 0;
+}
+
+int checkout_entry(struct cache_entry *ce, struct checkout *state)
+{
+ struct stat st;
+ static char path[MAXPATHLEN+1];
+ int len = state->base_dir_len;
+
+ memcpy(path, state->base_dir, len);
+ strcpy(path + len, ce->name);
+
+ if (!lstat(path, &st)) {
+ unsigned changed = ce_match_stat(ce, &st);
+ if (!changed)
+ return 0;
+ if (!state->force) {
+ if (!state->quiet)
+ fprintf(stderr, "git-checkout-cache: %s already exists\n", path);
+ return 0;
+ }
+
+ /*
+ * We unlink the old file, to get the new one with the
+ * right permissions (including umask, which is nasty
+ * to emulate by hand - much easier to let the system
+ * just do the right thing)
+ */
+ unlink(path);
+ if (S_ISDIR(st.st_mode)) {
+ if (!state->force)
+ return error("%s is a directory", path);
+ remove_subtree(path);
+ }
+ } else if (state->not_new)
+ return 0;
+ create_directories(path, state);
+ return write_entry(ce, path, state);
+}
+
+
diff --git a/epoch.c b/epoch.c
new file mode 100644
index 0000000..db44f5c
--- /dev/null
+++ b/epoch.c
@@ -0,0 +1,639 @@
+/*
+ * Copyright (c) 2005, Jon Seymour
+ *
+ * For more information about epoch theory on which this module is based,
+ * refer to http://blackcubes.dyndns.org/epoch/. That web page defines
+ * terms such as "epoch" and "minimal, non-linear epoch" and provides rationales
+ * for some of the algorithms used here.
+ *
+ */
+#include <stdlib.h>
+
+/* Provides arbitrary precision integers required to accurately represent
+ * fractional mass: */
+#include <openssl/bn.h>
+
+#include "cache.h"
+#include "commit.h"
+#include "epoch.h"
+
+struct fraction {
+ BIGNUM numerator;
+ BIGNUM denominator;
+};
+
+#define HAS_EXACTLY_ONE_PARENT(n) ((n)->parents && !(n)->parents->next)
+
+static BN_CTX *context = NULL;
+static struct fraction *one = NULL;
+static struct fraction *zero = NULL;
+
+static BN_CTX *get_BN_CTX(void)
+{
+ if (!context) {
+ context = BN_CTX_new();
+ }
+ return context;
+}
+
+static struct fraction *new_zero(void)
+{
+ struct fraction *result = xmalloc(sizeof(*result));
+ BN_init(&result->numerator);
+ BN_init(&result->denominator);
+ BN_zero(&result->numerator);
+ BN_one(&result->denominator);
+ return result;
+}
+
+static void clear_fraction(struct fraction *fraction)
+{
+ BN_clear(&fraction->numerator);
+ BN_clear(&fraction->denominator);
+}
+
+static struct fraction *divide(struct fraction *result, struct fraction *fraction, int divisor)
+{
+ BIGNUM bn_divisor;
+
+ BN_init(&bn_divisor);
+ BN_set_word(&bn_divisor, divisor);
+
+ BN_copy(&result->numerator, &fraction->numerator);
+ BN_mul(&result->denominator, &fraction->denominator, &bn_divisor, get_BN_CTX());
+
+ BN_clear(&bn_divisor);
+ return result;
+}
+
+static struct fraction *init_fraction(struct fraction *fraction)
+{
+ BN_init(&fraction->numerator);
+ BN_init(&fraction->denominator);
+ BN_zero(&fraction->numerator);
+ BN_one(&fraction->denominator);
+ return fraction;
+}
+
+static struct fraction *get_one(void)
+{
+ if (!one) {
+ one = new_zero();
+ BN_one(&one->numerator);
+ }
+ return one;
+}
+
+static struct fraction *get_zero(void)
+{
+ if (!zero) {
+ zero = new_zero();
+ }
+ return zero;
+}
+
+static struct fraction *copy(struct fraction *to, struct fraction *from)
+{
+ BN_copy(&to->numerator, &from->numerator);
+ BN_copy(&to->denominator, &from->denominator);
+ return to;
+}
+
+static struct fraction *add(struct fraction *result, struct fraction *left, struct fraction *right)
+{
+ BIGNUM a, b, gcd;
+
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&gcd);
+
+ BN_mul(&a, &left->numerator, &right->denominator, get_BN_CTX());
+ BN_mul(&b, &left->denominator, &right->numerator, get_BN_CTX());
+ BN_mul(&result->denominator, &left->denominator, &right->denominator, get_BN_CTX());
+ BN_add(&result->numerator, &a, &b);
+
+ BN_gcd(&gcd, &result->denominator, &result->numerator, get_BN_CTX());
+ BN_div(&result->denominator, NULL, &result->denominator, &gcd, get_BN_CTX());
+ BN_div(&result->numerator, NULL, &result->numerator, &gcd, get_BN_CTX());
+
+ BN_clear(&a);
+ BN_clear(&b);
+ BN_clear(&gcd);
+
+ return result;
+}
+
+static int compare(struct fraction *left, struct fraction *right)
+{
+ BIGNUM a, b;
+ int result;
+
+ BN_init(&a);
+ BN_init(&b);
+
+ BN_mul(&a, &left->numerator, &right->denominator, get_BN_CTX());
+ BN_mul(&b, &left->denominator, &right->numerator, get_BN_CTX());
+
+ result = BN_cmp(&a, &b);
+
+ BN_clear(&a);
+ BN_clear(&b);
+
+ return result;
+}
+
+struct mass_counter {
+ struct fraction seen;
+ struct fraction pending;
+};
+
+static struct mass_counter *new_mass_counter(struct commit *commit, struct fraction *pending)
+{
+ struct mass_counter *mass_counter = xmalloc(sizeof(*mass_counter));
+ memset(mass_counter, 0, sizeof(*mass_counter));
+
+ init_fraction(&mass_counter->seen);
+ init_fraction(&mass_counter->pending);
+
+ copy(&mass_counter->pending, pending);
+ copy(&mass_counter->seen, get_zero());
+
+ if (commit->object.util) {
+ die("multiple attempts to initialize mass counter for %s",
+ sha1_to_hex(commit->object.sha1));
+ }
+
+ commit->object.util = mass_counter;
+
+ return mass_counter;
+}
+
+static void free_mass_counter(struct mass_counter *counter)
+{
+ clear_fraction(&counter->seen);
+ clear_fraction(&counter->pending);
+ free(counter);
+}
+
+/*
+ * Finds the base commit of a list of commits.
+ *
+ * One property of the commit being searched for is that every commit reachable
+ * from the base commit is reachable from the commits in the starting list only
+ * via paths that include the base commit.
+ *
+ * This algorithm uses a conservation of mass approach to find the base commit.
+ *
+ * We start by injecting one unit of mass into the graph at each
+ * of the commits in the starting list. Injecting mass into a commit
+ * is achieved by adding to its pending mass counter and, if it is not already
+ * enqueued, enqueuing the commit in a list of pending commits, in latest
+ * commit date first order.
+ *
+ * The algorithm then preceeds to visit each commit in the pending queue.
+ * Upon each visit, the pending mass is added to the mass already seen for that
+ * commit and then divided into N equal portions, where N is the number of
+ * parents of the commit being visited. The divided portions are then injected
+ * into each of the parents.
+ *
+ * The algorithm continues until we discover a commit which has seen all the
+ * mass originally injected or until we run out of things to do.
+ *
+ * If we find a commit that has seen all the original mass, we have found
+ * the common base of all the commits in the starting list.
+ *
+ * The algorithm does _not_ depend on accurate timestamps for correct operation.
+ * However, reasonably sane (e.g. non-random) timestamps are required in order
+ * to prevent an exponential performance characteristic. The occasional
+ * timestamp inaccuracy will not dramatically affect performance but may
+ * result in more nodes being processed than strictly necessary.
+ *
+ * This procedure sets *boundary to the address of the base commit. It returns
+ * non-zero if, and only if, there was a problem parsing one of the
+ * commits discovered during the traversal.
+ */
+static int find_base_for_list(struct commit_list *list, struct commit **boundary)
+{
+ int ret = 0;
+ struct commit_list *cleaner = NULL;
+ struct commit_list *pending = NULL;
+ struct fraction injected;
+ init_fraction(&injected);
+ *boundary = NULL;
+
+ for (; list; list = list->next) {
+ struct commit *item = list->item;
+
+ if (!item->object.util) {
+ new_mass_counter(list->item, get_one());
+ add(&injected, &injected, get_one());
+
+ commit_list_insert(list->item, &cleaner);
+ commit_list_insert(list->item, &pending);
+ }
+ }
+
+ while (!*boundary && pending && !ret) {
+ struct commit *latest = pop_commit(&pending);
+ struct mass_counter *latest_node = (struct mass_counter *) latest->object.util;
+ int num_parents;
+
+ if ((ret = parse_commit(latest)))
+ continue;
+ add(&latest_node->seen, &latest_node->seen, &latest_node->pending);
+
+ num_parents = count_parents(latest);
+ if (num_parents) {
+ struct fraction distribution;
+ struct commit_list *parents;
+
+ divide(init_fraction(&distribution), &latest_node->pending, num_parents);
+
+ for (parents = latest->parents; parents; parents = parents->next) {
+ struct commit *parent = parents->item;
+ struct mass_counter *parent_node = (struct mass_counter *) parent->object.util;
+
+ if (!parent_node) {
+ parent_node = new_mass_counter(parent, &distribution);
+ insert_by_date(parent, &pending);
+ commit_list_insert(parent, &cleaner);
+ } else {
+ if (!compare(&parent_node->pending, get_zero()))
+ insert_by_date(parent, &pending);
+ add(&parent_node->pending, &parent_node->pending, &distribution);
+ }
+ }
+
+ clear_fraction(&distribution);
+ }
+
+ if (!compare(&latest_node->seen, &injected))
+ *boundary = latest;
+ copy(&latest_node->pending, get_zero());
+ }
+
+ while (cleaner) {
+ struct commit *next = pop_commit(&cleaner);
+ free_mass_counter((struct mass_counter *) next->object.util);
+ next->object.util = NULL;
+ }
+
+ if (pending)
+ free_commit_list(pending);
+
+ clear_fraction(&injected);
+ return ret;
+}
+
+
+/*
+ * Finds the base of an minimal, non-linear epoch, headed at head, by
+ * applying the find_base_for_list to a list consisting of the parents
+ */
+static int find_base(struct commit *head, struct commit **boundary)
+{
+ int ret = 0;
+ struct commit_list *pending = NULL;
+ struct commit_list *next;
+
+ for (next = head->parents; next; next = next->next) {
+ commit_list_insert(next->item, &pending);
+ }
+ ret = find_base_for_list(pending, boundary);
+ free_commit_list(pending);
+
+ return ret;
+}
+
+/*
+ * This procedure traverses to the boundary of the first epoch in the epoch
+ * sequence of the epoch headed at head_of_epoch. This is either the end of
+ * the maximal linear epoch or the base of a minimal non-linear epoch.
+ *
+ * The queue of pending nodes is sorted in reverse date order and each node
+ * is currently in the queue at most once.
+ */
+static int find_next_epoch_boundary(struct commit *head_of_epoch, struct commit **boundary)
+{
+ int ret;
+ struct commit *item = head_of_epoch;
+
+ ret = parse_commit(item);
+ if (ret)
+ return ret;
+
+ if (HAS_EXACTLY_ONE_PARENT(item)) {
+ /*
+ * We are at the start of a maximimal linear epoch.
+ * Traverse to the end.
+ */
+ while (HAS_EXACTLY_ONE_PARENT(item) && !ret) {
+ item = item->parents->item;
+ ret = parse_commit(item);
+ }
+ *boundary = item;
+
+ } else {
+ /*
+ * Otherwise, we are at the start of a minimal, non-linear
+ * epoch - find the common base of all parents.
+ */
+ ret = find_base(item, boundary);
+ }
+
+ return ret;
+}
+
+/*
+ * Returns non-zero if parent is known to be a parent of child.
+ */
+static int is_parent_of(struct commit *parent, struct commit *child)
+{
+ struct commit_list *parents;
+ for (parents = child->parents; parents; parents = parents->next) {
+ if (!memcmp(parent->object.sha1, parents->item->object.sha1,
+ sizeof(parents->item->object.sha1)))
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Pushes an item onto the merge order stack. If the top of the stack is
+ * marked as being a possible "break", we check to see whether it actually
+ * is a break.
+ */
+static void push_onto_merge_order_stack(struct commit_list **stack, struct commit *item)
+{
+ struct commit_list *top = *stack;
+ if (top && (top->item->object.flags & DISCONTINUITY)) {
+ if (is_parent_of(top->item, item)) {
+ top->item->object.flags &= ~DISCONTINUITY;
+ }
+ }
+ commit_list_insert(item, stack);
+}
+
+/*
+ * Marks all interesting, visited commits reachable from this commit
+ * as uninteresting. We stop recursing when we reach the epoch boundary,
+ * an unvisited node or a node that has already been marking uninteresting.
+ *
+ * This doesn't actually mark all ancestors between the start node and the
+ * epoch boundary uninteresting, but does ensure that they will eventually
+ * be marked uninteresting when the main sort_first_epoch() traversal
+ * eventually reaches them.
+ */
+static void mark_ancestors_uninteresting(struct commit *commit)
+{
+ unsigned int flags = commit->object.flags;
+ int visited = flags & VISITED;
+ int boundary = flags & BOUNDARY;
+ int uninteresting = flags & UNINTERESTING;
+ struct commit_list *next;
+
+ commit->object.flags |= UNINTERESTING;
+
+ /*
+ * We only need to recurse if
+ * we are not on the boundary and
+ * we have not already been marked uninteresting and
+ * we have already been visited.
+ *
+ * The main sort_first_epoch traverse will mark unreachable
+ * all uninteresting, unvisited parents as they are visited
+ * so there is no need to duplicate that traversal here.
+ *
+ * Similarly, if we are already marked uninteresting
+ * then either all ancestors have already been marked
+ * uninteresting or will be once the sort_first_epoch
+ * traverse reaches them.
+ */
+
+ if (uninteresting || boundary || !visited)
+ return;
+
+ for (next = commit->parents; next; next = next->next)
+ mark_ancestors_uninteresting(next->item);
+}
+
+/*
+ * Sorts the nodes of the first epoch of the epoch sequence of the epoch headed at head
+ * into merge order.
+ */
+static void sort_first_epoch(struct commit *head, struct commit_list **stack)
+{
+ struct commit_list *parents;
+
+ head->object.flags |= VISITED;
+
+ /*
+ * TODO: By sorting the parents in a different order, we can alter the
+ * merge order to show contemporaneous changes in parallel branches
+ * occurring after "local" changes. This is useful for a developer
+ * when a developer wants to see all changes that were incorporated
+ * into the same merge as her own changes occur after her own
+ * changes.
+ */
+
+ for (parents = head->parents; parents; parents = parents->next) {
+ struct commit *parent = parents->item;
+
+ if (head->object.flags & UNINTERESTING) {
+ /*
+ * Propagates the uninteresting bit to all parents.
+ * if we have already visited this parent, then
+ * the uninteresting bit will be propagated to each
+ * reachable commit that is still not marked
+ * uninteresting and won't otherwise be reached.
+ */
+ mark_ancestors_uninteresting(parent);
+ }
+
+ if (!(parent->object.flags & VISITED)) {
+ if (parent->object.flags & BOUNDARY) {
+ if (*stack) {
+ die("something else is on the stack - %s",
+ sha1_to_hex((*stack)->item->object.sha1));
+ }
+ push_onto_merge_order_stack(stack, parent);
+ parent->object.flags |= VISITED;
+
+ } else {
+ sort_first_epoch(parent, stack);
+ if (parents) {
+ /*
+ * This indicates a possible
+ * discontinuity it may not be be
+ * actual discontinuity if the head
+ * of parent N happens to be the tail
+ * of parent N+1.
+ *
+ * The next push onto the stack will
+ * resolve the question.
+ */
+ (*stack)->item->object.flags |= DISCONTINUITY;
+ }
+ }
+ }
+ }
+
+ push_onto_merge_order_stack(stack, head);
+}
+
+/*
+ * Emit the contents of the stack.
+ *
+ * The stack is freed and replaced by NULL.
+ *
+ * Sets the return value to STOP if no further output should be generated.
+ */
+static int emit_stack(struct commit_list **stack, emitter_func emitter, int include_last)
+{
+ unsigned int seen = 0;
+ int action = CONTINUE;
+
+ while (*stack && (action != STOP)) {
+ struct commit *next = pop_commit(stack);
+ seen |= next->object.flags;
+ if (*stack || include_last) {
+ if (!*stack)
+ next->object.flags |= BOUNDARY;
+ action = emitter(next);
+ }
+ }
+
+ if (*stack) {
+ free_commit_list(*stack);
+ *stack = NULL;
+ }
+
+ return (action == STOP || (seen & UNINTERESTING)) ? STOP : CONTINUE;
+}
+
+/*
+ * Sorts an arbitrary epoch into merge order by sorting each epoch
+ * of its epoch sequence into order.
+ *
+ * Note: this algorithm currently leaves traces of its execution in the
+ * object flags of nodes it discovers. This should probably be fixed.
+ */
+static int sort_in_merge_order(struct commit *head_of_epoch, emitter_func emitter)
+{
+ struct commit *next = head_of_epoch;
+ int ret = 0;
+ int action = CONTINUE;
+
+ ret = parse_commit(head_of_epoch);
+
+ next->object.flags |= BOUNDARY;
+
+ while (next && next->parents && !ret && (action != STOP)) {
+ struct commit *base = NULL;
+
+ ret = find_next_epoch_boundary(next, &base);
+ if (ret)
+ return ret;
+ next->object.flags |= BOUNDARY;
+ if (base)
+ base->object.flags |= BOUNDARY;
+
+ if (HAS_EXACTLY_ONE_PARENT(next)) {
+ while (HAS_EXACTLY_ONE_PARENT(next)
+ && (action != STOP)
+ && !ret) {
+ if (next->object.flags & UNINTERESTING) {
+ action = STOP;
+ } else {
+ action = emitter(next);
+ }
+ if (action != STOP) {
+ next = next->parents->item;
+ ret = parse_commit(next);
+ }
+ }
+
+ } else {
+ struct commit_list *stack = NULL;
+ sort_first_epoch(next, &stack);
+ action = emit_stack(&stack, emitter, (base == NULL));
+ next = base;
+ }
+ }
+
+ if (next && (action != STOP) && !ret) {
+ emitter(next);
+ }
+
+ return ret;
+}
+
+/*
+ * Sorts the nodes reachable from a starting list in merge order, we
+ * first find the base for the starting list and then sort all nodes
+ * in this subgraph using the sort_first_epoch algorithm. Once we have
+ * reached the base we can continue sorting using sort_in_merge_order.
+ */
+int sort_list_in_merge_order(struct commit_list *list, emitter_func emitter)
+{
+ struct commit_list *stack = NULL;
+ struct commit *base;
+ int ret = 0;
+ int action = CONTINUE;
+ struct commit_list *reversed = NULL;
+
+ for (; list; list = list->next)
+ commit_list_insert(list->item, &reversed);
+
+ if (!reversed)
+ return ret;
+ else if (!reversed->next) {
+ /*
+ * If there is only one element in the list, we can sort it
+ * using sort_in_merge_order.
+ */
+ base = reversed->item;
+ } else {
+ /*
+ * Otherwise, we search for the base of the list.
+ */
+ ret = find_base_for_list(reversed, &base);
+ if (ret)
+ return ret;
+ if (base)
+ base->object.flags |= BOUNDARY;
+
+ while (reversed) {
+ struct commit * next = pop_commit(&reversed);
+
+ if (!(next->object.flags & VISITED) && next!=base) {
+ sort_first_epoch(next, &stack);
+ if (reversed) {
+ /*
+ * If we have more commits
+ * to push, then the first
+ * push for the next parent may
+ * (or may * not) represent a
+ * discontinuity with respect
+ * to the parent currently on
+ * the top of the stack.
+ *
+ * Mark it for checking here,
+ * and check it with the next
+ * push. See sort_first_epoch()
+ * for more details.
+ */
+ stack->item->object.flags |= DISCONTINUITY;
+ }
+ }
+ }
+
+ action = emit_stack(&stack, emitter, (base==NULL));
+ }
+
+ if (base && (action != STOP)) {
+ ret = sort_in_merge_order(base, emitter);
+ }
+
+ return ret;
+}
diff --git a/epoch.h b/epoch.h
new file mode 100644
index 0000000..7493d5a
--- /dev/null
+++ b/epoch.h
@@ -0,0 +1,21 @@
+#ifndef EPOCH_H
+#define EPOCH_H
+
+
+// return codes for emitter_func
+#define STOP 0
+#define CONTINUE 1
+#define DO 2
+typedef int (*emitter_func) (struct commit *);
+
+int sort_list_in_merge_order(struct commit_list *list, emitter_func emitter);
+
+/* Low bits are used by rev-list */
+#define UNINTERESTING (1u<<10)
+#define BOUNDARY (1u<<11)
+#define VISITED (1u<<12)
+#define DISCONTINUITY (1u<<13)
+#define LAST_EPOCH_FLAG (1u<<14)
+
+
+#endif /* EPOCH_H */
diff --git a/export.c b/export.c
new file mode 100644
index 0000000..ce10b5a
--- /dev/null
+++ b/export.c
@@ -0,0 +1,81 @@
+#include "cache.h"
+#include "commit.h"
+
+/*
+ * Show one commit
+ */
+static void show_commit(struct commit *commit)
+{
+ char cmdline[400];
+ char hex[100];
+
+ strcpy(hex, sha1_to_hex(commit->object.sha1));
+ printf("Id: %s\n", hex);
+ fflush(NULL);
+ sprintf(cmdline, "git-cat-file commit %s", hex);
+ system(cmdline);
+ if (commit->parents) {
+ char *against = sha1_to_hex(commit->parents->item->object.sha1);
+ printf("\n\n======== diff against %s ========\n", against);
+ fflush(NULL);
+ sprintf(cmdline, "git-diff-tree -p %s %s", against, hex);
+ system(cmdline);
+ }
+ printf("======== end ========\n\n");
+}
+
+/*
+ * Show all unseen commits, depth-first
+ */
+static void show_unseen(struct commit *top)
+{
+ struct commit_list *parents;
+
+ if (top->object.flags & 2)
+ return;
+ top->object.flags |= 2;
+ parents = top->parents;
+ while (parents) {
+ show_unseen(parents->item);
+ parents = parents->next;
+ }
+ show_commit(top);
+}
+
+static void export(struct commit *top, struct commit *base)
+{
+ mark_reachable(&top->object, 1);
+ if (base)
+ mark_reachable(&base->object, 2);
+ show_unseen(top);
+}
+
+static struct commit *get_commit(unsigned char *sha1)
+{
+ struct commit *commit = lookup_commit(sha1);
+ if (!commit->object.parsed) {
+ struct commit_list *parents;
+
+ if (parse_commit(commit) < 0)
+ die("unable to parse commit %s", sha1_to_hex(sha1));
+ parents = commit->parents;
+ while (parents) {
+ get_commit(parents->item->object.sha1);
+ parents = parents->next;
+ }
+ }
+ return commit;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned char base_sha1[20];
+ unsigned char top_sha1[20];
+
+ if (argc < 2 || argc > 4 ||
+ get_sha1(argv[1], top_sha1) ||
+ (argc == 3 && get_sha1(argv[2], base_sha1)))
+ usage("git-export top [base]");
+ export(get_commit(top_sha1), argc==3 ? get_commit(base_sha1) : NULL);
+ return 0;
+}
diff --git a/fetch-pack.c b/fetch-pack.c
new file mode 100644
index 0000000..65e0076
--- /dev/null
+++ b/fetch-pack.c
@@ -0,0 +1,144 @@
+#include "cache.h"
+#include "refs.h"
+#include "pkt-line.h"
+#include <sys/wait.h>
+
+static int quiet;
+static const char fetch_pack_usage[] = "git-fetch-pack [-q] [--exec=upload-pack] [host:]directory [heads]* < mycommitlist";
+static const char *exec = "git-upload-pack";
+
+static int find_common(int fd[2], unsigned char *result_sha1, unsigned char *remote)
+{
+ static char line[1000];
+ int count = 0, flushes = 0, retval;
+ FILE *revs;
+
+ revs = popen("git-rev-list $(git-rev-parse --all)", "r");
+ if (!revs)
+ die("unable to run 'git-rev-list'");
+ packet_write(fd[1], "want %s\n", sha1_to_hex(remote));
+ packet_flush(fd[1]);
+ flushes = 1;
+ retval = -1;
+ while (fgets(line, sizeof(line), revs) != NULL) {
+ unsigned char sha1[20];
+ if (get_sha1_hex(line, sha1))
+ die("git-fetch-pack: expected object name, got crud");
+ packet_write(fd[1], "have %s\n", sha1_to_hex(sha1));
+ if (!(31 & ++count)) {
+ packet_flush(fd[1]);
+ flushes++;
+
+ /*
+ * We keep one window "ahead" of the other side, and
+ * will wait for an ACK only on the next one
+ */
+ if (count == 32)
+ continue;
+ if (get_ack(fd[0], result_sha1)) {
+ flushes = 0;
+ retval = 0;
+ break;
+ }
+ flushes--;
+ }
+ }
+ pclose(revs);
+ packet_write(fd[1], "done\n");
+ while (flushes) {
+ flushes--;
+ if (get_ack(fd[0], result_sha1))
+ return 0;
+ }
+ return retval;
+}
+
+/*
+ * Eventually we'll want to be able to fetch multiple heads.
+ *
+ * Right now we'll just require a single match.
+ */
+static int fetch_pack(int fd[2], int nr_match, char **match)
+{
+ struct ref *ref;
+ unsigned char sha1[20];
+ int status;
+ pid_t pid;
+
+ get_remote_heads(fd[0], &ref, nr_match, match);
+ if (!ref) {
+ packet_flush(fd[1]);
+ die("no matching remote head");
+ }
+ if (ref->next) {
+ packet_flush(fd[1]);
+ die("multiple remote heads");
+ }
+ if (find_common(fd, sha1, ref->old_sha1) < 0)
+ die("git-fetch-pack: no common commits");
+ pid = fork();
+ if (pid < 0)
+ die("git-fetch-pack: unable to fork off git-unpack-objects");
+ if (!pid) {
+ dup2(fd[0], 0);
+ close(fd[0]);
+ close(fd[1]);
+ execlp("git-unpack-objects", "git-unpack-objects",
+ quiet ? "-q" : NULL, NULL);
+ die("git-unpack-objects exec failed");
+ }
+ close(fd[0]);
+ close(fd[1]);
+ while (waitpid(pid, &status, 0) < 0) {
+ if (errno != EINTR)
+ die("waiting for git-unpack-objects: %s", strerror(errno));
+ }
+ if (WIFEXITED(status)) {
+ int code = WEXITSTATUS(status);
+ if (code)
+ die("git-unpack-objects died with error code %d", code);
+ puts(sha1_to_hex(ref->old_sha1));
+ return 0;
+ }
+ if (WIFSIGNALED(status)) {
+ int sig = WTERMSIG(status);
+ die("git-unpack-objects died of signal %d", sig);
+ }
+ die("Sherlock Holmes! git-unpack-objects died of unnatural causes %d!", status);
+}
+
+int main(int argc, char **argv)
+{
+ int i, ret, nr_heads;
+ char *dest = NULL, **heads;
+ int fd[2];
+ pid_t pid;
+
+ nr_heads = 0;
+ heads = NULL;
+ for (i = 1; i < argc; i++) {
+ char *arg = argv[i];
+
+ if (*arg == '-') {
+ if (!strncmp("--exec=", arg, 7)) {
+ exec = arg + 7;
+ continue;
+ }
+ usage(fetch_pack_usage);
+ }
+ dest = arg;
+ heads = argv + i + 1;
+ nr_heads = argc - i - 1;
+ break;
+ }
+ if (!dest)
+ usage(fetch_pack_usage);
+ pid = git_connect(fd, dest, exec);
+ if (pid < 0)
+ return 1;
+ ret = fetch_pack(fd, nr_heads, heads);
+ close(fd[0]);
+ close(fd[1]);
+ finish_connect(pid);
+ return ret;
+}
diff --git a/fsck-cache.c b/fsck-cache.c
new file mode 100644
index 0000000..e40c643
--- /dev/null
+++ b/fsck-cache.c
@@ -0,0 +1,534 @@
+#include <sys/types.h>
+#include <dirent.h>
+
+#include "cache.h"
+#include "commit.h"
+#include "tree.h"
+#include "blob.h"
+#include "tag.h"
+#include "refs.h"
+#include "pack.h"
+
+#define REACHABLE 0x0001
+
+static int show_root = 0;
+static int show_tags = 0;
+static int show_unreachable = 0;
+static int standalone = 0;
+static int check_full = 0;
+static int check_strict = 0;
+static int keep_cache_objects = 0;
+static unsigned char head_sha1[20];
+
+static void check_connectivity(void)
+{
+ int i;
+
+ /* Look up all the requirements, warn about missing objects.. */
+ for (i = 0; i < nr_objs; i++) {
+ struct object *obj = objs[i];
+ struct object_list *refs;
+
+ if (!obj->parsed) {
+ if (!standalone && has_sha1_file(obj->sha1))
+ ; /* it is in pack */
+ else
+ printf("missing %s %s\n",
+ obj->type, sha1_to_hex(obj->sha1));
+ continue;
+ }
+
+ for (refs = obj->refs; refs; refs = refs->next) {
+ if (refs->item->parsed ||
+ (!standalone && has_sha1_file(refs->item->sha1)))
+ continue;
+ printf("broken link from %7s %s\n",
+ obj->type, sha1_to_hex(obj->sha1));
+ printf(" to %7s %s\n",
+ refs->item->type, sha1_to_hex(refs->item->sha1));
+ }
+
+ if (show_unreachable && !(obj->flags & REACHABLE)) {
+ printf("unreachable %s %s\n",
+ obj->type, sha1_to_hex(obj->sha1));
+ continue;
+ }
+
+ if (!obj->used) {
+ printf("dangling %s %s\n", obj->type,
+ sha1_to_hex(obj->sha1));
+ }
+ }
+}
+
+/*
+ * The entries in a tree are ordered in the _path_ order,
+ * which means that a directory entry is ordered by adding
+ * a slash to the end of it.
+ *
+ * So a directory called "a" is ordered _after_ a file
+ * called "a.c", because "a/" sorts after "a.c".
+ */
+#define TREE_UNORDERED (-1)
+#define TREE_HAS_DUPS (-2)
+
+static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
+{
+ int len1 = strlen(a->name);
+ int len2 = strlen(b->name);
+ int len = len1 < len2 ? len1 : len2;
+ unsigned char c1, c2;
+ int cmp;
+
+ cmp = memcmp(a->name, b->name, len);
+ if (cmp < 0)
+ return 0;
+ if (cmp > 0)
+ return TREE_UNORDERED;
+
+ /*
+ * Ok, the first <len> characters are the same.
+ * Now we need to order the next one, but turn
+ * a '\0' into a '/' for a directory entry.
+ */
+ c1 = a->name[len];
+ c2 = b->name[len];
+ if (!c1 && !c2)
+ /*
+ * git-write-tree used to write out a nonsense tree that has
+ * entries with the same name, one blob and one tree. Make
+ * sure we do not have duplicate entries.
+ */
+ return TREE_HAS_DUPS;
+ if (!c1 && a->directory)
+ c1 = '/';
+ if (!c2 && b->directory)
+ c2 = '/';
+ return c1 < c2 ? 0 : TREE_UNORDERED;
+}
+
+static int fsck_tree(struct tree *item)
+{
+ int retval;
+ int has_full_path = 0;
+ int has_zero_pad = 0;
+ int has_bad_modes = 0;
+ int has_dup_entries = 0;
+ int not_properly_sorted = 0;
+ struct tree_entry_list *entry, *last;
+
+ last = NULL;
+ for (entry = item->entries; entry; entry = entry->next) {
+ if (strchr(entry->name, '/'))
+ has_full_path = 1;
+ has_zero_pad |= entry->zeropad;
+
+ switch (entry->mode) {
+ /*
+ * Standard modes..
+ */
+ case S_IFREG | 0755:
+ case S_IFREG | 0644:
+ case S_IFLNK:
+ case S_IFDIR:
+ break;
+ /*
+ * This is nonstandard, but we had a few of these
+ * early on when we honored the full set of mode
+ * bits..
+ */
+ case S_IFREG | 0664:
+ if (!check_strict)
+ break;
+ default:
+ has_bad_modes = 1;
+ }
+
+ if (last) {
+ switch (verify_ordered(last, entry)) {
+ case TREE_UNORDERED:
+ not_properly_sorted = 1;
+ break;
+ case TREE_HAS_DUPS:
+ has_dup_entries = 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ last = entry;
+ }
+
+ retval = 0;
+ if (has_full_path) {
+ fprintf(stderr, "warning: git-fsck-cache: tree %s "
+ "has full pathnames in it\n",
+ sha1_to_hex(item->object.sha1));
+ }
+ if (has_zero_pad) {
+ fprintf(stderr, "warning: git-fsck-cache: tree %s "
+ "has zero-padded file modes in it\n",
+ sha1_to_hex(item->object.sha1));
+ }
+ if (has_bad_modes) {
+ fprintf(stderr, "warning: git-fsck-cache: tree %s "
+ "has bad file modes in it\n",
+ sha1_to_hex(item->object.sha1));
+ }
+ if (has_dup_entries) {
+ fprintf(stderr, "error: git-fsck-cache: tree %s "
+ "has duplicate file entries\n",
+ sha1_to_hex(item->object.sha1));
+ retval = -1;
+ }
+ if (not_properly_sorted) {
+ fprintf(stderr, "error: git-fsck-cache: tree %s "
+ "is not properly sorted\n",
+ sha1_to_hex(item->object.sha1));
+ retval = -1;
+ }
+ return retval;
+}
+
+static int fsck_commit(struct commit *commit)
+{
+ char *buffer = commit->buffer;
+ unsigned char sha1[20];
+
+ if (memcmp(buffer, "tree ", 5))
+ return -1;
+ if (get_sha1_hex(buffer+5, sha1) || buffer[45] != '\n')
+ return -1;
+ buffer += 46;
+ while (!memcmp(buffer, "parent ", 7)) {
+ if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
+ return -1;
+ buffer += 48;
+ }
+ if (memcmp(buffer, "author ", 7))
+ return -1;
+ free(commit->buffer);
+ commit->buffer = NULL;
+ if (!commit->tree)
+ return -1;
+ if (!commit->parents && show_root)
+ printf("root %s\n", sha1_to_hex(commit->object.sha1));
+ if (!commit->date)
+ printf("bad commit date in %s\n",
+ sha1_to_hex(commit->object.sha1));
+ return 0;
+}
+
+static int fsck_tag(struct tag *tag)
+{
+ struct object *tagged = tag->tagged;
+
+ if (!tagged) {
+ printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
+ return -1;
+ }
+ if (!show_tags)
+ return 0;
+
+ printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
+ printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
+ return 0;
+}
+
+static int fsck_sha1(unsigned char *sha1)
+{
+ struct object *obj = parse_object(sha1);
+ if (!obj)
+ return -1;
+ if (obj->type == blob_type)
+ return 0;
+ if (obj->type == tree_type)
+ return fsck_tree((struct tree *) obj);
+ if (obj->type == commit_type)
+ return fsck_commit((struct commit *) obj);
+ if (obj->type == tag_type)
+ return fsck_tag((struct tag *) obj);
+ return -1;
+}
+
+/*
+ * This is the sorting chunk size: make it reasonably
+ * big so that we can sort well..
+ */
+#define MAX_SHA1_ENTRIES (1024)
+
+struct sha1_entry {
+ unsigned long ino;
+ unsigned char sha1[20];
+};
+
+static struct {
+ unsigned long nr;
+ struct sha1_entry *entry[MAX_SHA1_ENTRIES];
+} sha1_list;
+
+static int ino_compare(const void *_a, const void *_b)
+{
+ const struct sha1_entry *a = _a, *b = _b;
+ unsigned long ino1 = a->ino, ino2 = b->ino;
+ return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
+}
+
+static void fsck_sha1_list(void)
+{
+ int i, nr = sha1_list.nr;
+
+ qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
+ for (i = 0; i < nr; i++) {
+ struct sha1_entry *entry = sha1_list.entry[i];
+ unsigned char *sha1 = entry->sha1;
+
+ sha1_list.entry[i] = NULL;
+ if (fsck_sha1(sha1) < 0)
+ fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
+ free(entry);
+ }
+ sha1_list.nr = 0;
+}
+
+static void add_sha1_list(unsigned char *sha1, unsigned long ino)
+{
+ struct sha1_entry *entry = xmalloc(sizeof(*entry));
+ int nr;
+
+ entry->ino = ino;
+ memcpy(entry->sha1, sha1, 20);
+ nr = sha1_list.nr;
+ if (nr == MAX_SHA1_ENTRIES) {
+ fsck_sha1_list();
+ nr = 0;
+ }
+ sha1_list.entry[nr] = entry;
+ sha1_list.nr = ++nr;
+}
+
+static int fsck_dir(int i, char *path)
+{
+ DIR *dir = opendir(path);
+ struct dirent *de;
+
+ if (!dir) {
+ return error("missing sha1 directory '%s'", path);
+ }
+
+ while ((de = readdir(dir)) != NULL) {
+ char name[100];
+ unsigned char sha1[20];
+ int len = strlen(de->d_name);
+
+ switch (len) {
+ case 2:
+ if (de->d_name[1] != '.')
+ break;
+ case 1:
+ if (de->d_name[0] != '.')
+ break;
+ continue;
+ case 38:
+ sprintf(name, "%02x", i);
+ memcpy(name+2, de->d_name, len+1);
+ if (get_sha1_hex(name, sha1) < 0)
+ break;
+ add_sha1_list(sha1, de->d_ino);
+ continue;
+ }
+ fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
+ }
+ closedir(dir);
+ return 0;
+}
+
+static int default_refs = 0;
+
+static int fsck_handle_ref(const char *refname, const unsigned char *sha1)
+{
+ struct object *obj;
+
+ obj = lookup_object(sha1);
+ if (!obj) {
+ if (!standalone && has_sha1_file(sha1)) {
+ default_refs++;
+ return 0; /* it is in a pack */
+ }
+ error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
+ /* We'll continue with the rest despite the error.. */
+ return 0;
+ }
+ default_refs++;
+ obj->used = 1;
+ mark_reachable(obj, REACHABLE);
+ return 0;
+}
+
+static void get_default_heads(void)
+{
+ for_each_ref(fsck_handle_ref);
+ if (!default_refs)
+ die("No default references");
+}
+
+static void fsck_object_dir(const char *path)
+{
+ int i;
+ for (i = 0; i < 256; i++) {
+ static char dir[4096];
+ sprintf(dir, "%s/%02x", path, i);
+ fsck_dir(i, dir);
+ }
+ fsck_sha1_list();
+}
+
+static int fsck_head_link(void)
+{
+ int fd, count;
+ char hex[40];
+ unsigned char sha1[20];
+ static char path[PATH_MAX], link[PATH_MAX];
+ const char *git_dir = gitenv(GIT_DIR_ENVIRONMENT) ? : DEFAULT_GIT_DIR_ENVIRONMENT;
+
+ snprintf(path, sizeof(path), "%s/HEAD", git_dir);
+ if (readlink(path, link, sizeof(link)) < 0)
+ return error("HEAD is not a symlink");
+ if (strncmp("refs/heads/", link, 11))
+ return error("HEAD points to something strange (%s)", link);
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return error("HEAD: %s", strerror(errno));
+ count = read(fd, hex, sizeof(hex));
+ close(fd);
+ if (count < 0)
+ return error("HEAD: %s", strerror(errno));
+ if (count < 40 || get_sha1_hex(hex, sha1))
+ return error("HEAD: not a valid git pointer");
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int i, heads;
+
+ for (i = 1; i < argc; i++) {
+ const char *arg = argv[i];
+
+ if (!strcmp(arg, "--unreachable")) {
+ show_unreachable = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--tags")) {
+ show_tags = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--root")) {
+ show_root = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--cache")) {
+ keep_cache_objects = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--standalone")) {
+ standalone = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--full")) {
+ check_full = 1;
+ continue;
+ }
+ if (!strcmp(arg, "--strict")) {
+ check_strict = 1;
+ continue;
+ }
+ if (*arg == '-')
+ usage("git-fsck-cache [--tags] [--root] [[--unreachable] [--cache] [--standalone | --full] [--strict] <head-sha1>*]");
+ }
+
+ if (standalone && check_full)
+ die("Only one of --standalone or --full can be used.");
+ if (standalone)
+ unsetenv("GIT_ALTERNATE_OBJECT_DIRECTORIES");
+
+ fsck_head_link();
+ fsck_object_dir(get_object_directory());
+ if (check_full) {
+ int j;
+ struct packed_git *p;
+ prepare_alt_odb();
+ for (j = 0; alt_odb[j].base; j++) {
+ char namebuf[PATH_MAX];
+ int namelen = al