builtin/cat-file: mark 'git cat-file' sparse-index compatible
[git/gitster.git] / hash.h
blob72ffbc862e557a542c49e9afe9b7fb709d32b0a1
1 #ifndef HASH_H
2 #define HASH_H
4 #if defined(SHA1_APPLE)
5 #include <CommonCrypto/CommonDigest.h>
6 #elif defined(SHA1_OPENSSL)
7 # include <openssl/sha.h>
8 # if defined(OPENSSL_API_LEVEL) && OPENSSL_API_LEVEL >= 3
9 # define SHA1_NEEDS_CLONE_HELPER
10 # include "sha1/openssl.h"
11 # endif
12 #elif defined(SHA1_DC)
13 #include "sha1dc_git.h"
14 #else /* SHA1_BLK */
15 #include "block-sha1/sha1.h"
16 #endif
18 #if defined(SHA256_NETTLE)
19 #include "sha256/nettle.h"
20 #elif defined(SHA256_GCRYPT)
21 #define SHA256_NEEDS_CLONE_HELPER
22 #include "sha256/gcrypt.h"
23 #elif defined(SHA256_OPENSSL)
24 # include <openssl/sha.h>
25 # if defined(OPENSSL_API_LEVEL) && OPENSSL_API_LEVEL >= 3
26 # define SHA256_NEEDS_CLONE_HELPER
27 # include "sha256/openssl.h"
28 # endif
29 #else
30 #include "sha256/block/sha256.h"
31 #endif
33 #ifndef platform_SHA_CTX
35 * platform's underlying implementation of SHA-1; could be OpenSSL,
36 * blk_SHA, Apple CommonCrypto, etc... Note that the relevant
37 * SHA-1 header may have already defined platform_SHA_CTX for our
38 * own implementations like block-sha1, so we list
39 * the default for OpenSSL compatible SHA-1 implementations here.
41 #define platform_SHA_CTX SHA_CTX
42 #define platform_SHA1_Init SHA1_Init
43 #define platform_SHA1_Update SHA1_Update
44 #define platform_SHA1_Final SHA1_Final
45 #endif
47 #define git_SHA_CTX platform_SHA_CTX
48 #define git_SHA1_Init platform_SHA1_Init
49 #define git_SHA1_Update platform_SHA1_Update
50 #define git_SHA1_Final platform_SHA1_Final
52 #ifdef platform_SHA1_Clone
53 #define git_SHA1_Clone platform_SHA1_Clone
54 #endif
56 #ifndef platform_SHA256_CTX
57 #define platform_SHA256_CTX SHA256_CTX
58 #define platform_SHA256_Init SHA256_Init
59 #define platform_SHA256_Update SHA256_Update
60 #define platform_SHA256_Final SHA256_Final
61 #endif
63 #define git_SHA256_CTX platform_SHA256_CTX
64 #define git_SHA256_Init platform_SHA256_Init
65 #define git_SHA256_Update platform_SHA256_Update
66 #define git_SHA256_Final platform_SHA256_Final
68 #ifdef platform_SHA256_Clone
69 #define git_SHA256_Clone platform_SHA256_Clone
70 #endif
72 #ifdef SHA1_MAX_BLOCK_SIZE
73 #include "compat/sha1-chunked.h"
74 #undef git_SHA1_Update
75 #define git_SHA1_Update git_SHA1_Update_Chunked
76 #endif
78 #ifndef SHA1_NEEDS_CLONE_HELPER
79 static inline void git_SHA1_Clone(git_SHA_CTX *dst, const git_SHA_CTX *src)
81 memcpy(dst, src, sizeof(*dst));
83 #endif
85 #ifndef SHA256_NEEDS_CLONE_HELPER
86 static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *src)
88 memcpy(dst, src, sizeof(*dst));
90 #endif
93 * Note that these constants are suitable for indexing the hash_algos array and
94 * comparing against each other, but are otherwise arbitrary, so they should not
95 * be exposed to the user or serialized to disk. To know whether a
96 * git_hash_algo struct points to some usable hash function, test the format_id
97 * field for being non-zero. Use the name field for user-visible situations and
98 * the format_id field for fixed-length fields on disk.
100 /* An unknown hash function. */
101 #define GIT_HASH_UNKNOWN 0
102 /* SHA-1 */
103 #define GIT_HASH_SHA1 1
104 /* SHA-256 */
105 #define GIT_HASH_SHA256 2
106 /* Number of algorithms supported (including unknown). */
107 #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1)
109 /* "sha1", big-endian */
110 #define GIT_SHA1_FORMAT_ID 0x73686131
112 /* The length in bytes and in hex digits of an object name (SHA-1 value). */
113 #define GIT_SHA1_RAWSZ 20
114 #define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
115 /* The block size of SHA-1. */
116 #define GIT_SHA1_BLKSZ 64
118 /* "s256", big-endian */
119 #define GIT_SHA256_FORMAT_ID 0x73323536
121 /* The length in bytes and in hex digits of an object name (SHA-256 value). */
122 #define GIT_SHA256_RAWSZ 32
123 #define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
124 /* The block size of SHA-256. */
125 #define GIT_SHA256_BLKSZ 64
127 /* The length in byte and in hex digits of the largest possible hash value. */
128 #define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
129 #define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
130 /* The largest possible block size for any supported hash. */
131 #define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
133 struct object_id {
134 unsigned char hash[GIT_MAX_RAWSZ];
135 int algo; /* XXX requires 4-byte alignment */
138 #define GET_OID_QUIETLY 01
139 #define GET_OID_COMMIT 02
140 #define GET_OID_COMMITTISH 04
141 #define GET_OID_TREE 010
142 #define GET_OID_TREEISH 020
143 #define GET_OID_BLOB 040
144 #define GET_OID_FOLLOW_SYMLINKS 0100
145 #define GET_OID_RECORD_PATH 0200
146 #define GET_OID_ONLY_TO_DIE 04000
147 #define GET_OID_REQUIRE_PATH 010000
148 #define GET_OID_HASH_ANY 020000
150 #define GET_OID_DISAMBIGUATORS \
151 (GET_OID_COMMIT | GET_OID_COMMITTISH | \
152 GET_OID_TREE | GET_OID_TREEISH | \
153 GET_OID_BLOB)
155 enum get_oid_result {
156 FOUND = 0,
157 MISSING_OBJECT = -1, /* The requested object is missing */
158 SHORT_NAME_AMBIGUOUS = -2,
159 /* The following only apply when symlinks are followed */
160 DANGLING_SYMLINK = -4, /*
161 * The initial symlink is there, but
162 * (transitively) points to a missing
163 * in-tree file
165 SYMLINK_LOOP = -5,
166 NOT_DIR = -6, /*
167 * Somewhere along the symlink chain, a path is
168 * requested which contains a file as a
169 * non-final element.
173 #ifdef USE_THE_REPOSITORY_VARIABLE
174 # include "repository.h"
175 # define the_hash_algo the_repository->hash_algo
176 #endif
178 /* A suitably aligned type for stack allocations of hash contexts. */
179 union git_hash_ctx {
180 git_SHA_CTX sha1;
181 git_SHA256_CTX sha256;
183 typedef union git_hash_ctx git_hash_ctx;
185 typedef void (*git_hash_init_fn)(git_hash_ctx *ctx);
186 typedef void (*git_hash_clone_fn)(git_hash_ctx *dst, const git_hash_ctx *src);
187 typedef void (*git_hash_update_fn)(git_hash_ctx *ctx, const void *in, size_t len);
188 typedef void (*git_hash_final_fn)(unsigned char *hash, git_hash_ctx *ctx);
189 typedef void (*git_hash_final_oid_fn)(struct object_id *oid, git_hash_ctx *ctx);
191 struct git_hash_algo {
193 * The name of the algorithm, as appears in the config file and in
194 * messages.
196 const char *name;
198 /* A four-byte version identifier, used in pack indices. */
199 uint32_t format_id;
201 /* The length of the hash in binary. */
202 size_t rawsz;
204 /* The length of the hash in hex characters. */
205 size_t hexsz;
207 /* The block size of the hash. */
208 size_t blksz;
210 /* The hash initialization function. */
211 git_hash_init_fn init_fn;
213 /* The hash context cloning function. */
214 git_hash_clone_fn clone_fn;
216 /* The hash update function. */
217 git_hash_update_fn update_fn;
219 /* The hash finalization function. */
220 git_hash_final_fn final_fn;
222 /* The hash finalization function for object IDs. */
223 git_hash_final_oid_fn final_oid_fn;
225 /* The OID of the empty tree. */
226 const struct object_id *empty_tree;
228 /* The OID of the empty blob. */
229 const struct object_id *empty_blob;
231 /* The all-zeros OID. */
232 const struct object_id *null_oid;
234 extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS];
237 * Return a GIT_HASH_* constant based on the name. Returns GIT_HASH_UNKNOWN if
238 * the name doesn't match a known algorithm.
240 int hash_algo_by_name(const char *name);
241 /* Identical, except based on the format ID. */
242 int hash_algo_by_id(uint32_t format_id);
243 /* Identical, except based on the length. */
244 int hash_algo_by_length(int len);
245 /* Identical, except for a pointer to struct git_hash_algo. */
246 static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
248 return p - hash_algos;
251 const struct object_id *null_oid(void);
253 static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop)
256 * Teach the compiler that there are only two possibilities of hash size
257 * here, so that it can optimize for this case as much as possible.
259 if (algop->rawsz == GIT_MAX_RAWSZ)
260 return memcmp(sha1, sha2, GIT_MAX_RAWSZ);
261 return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
264 static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop)
267 * We write this here instead of deferring to hashcmp so that the
268 * compiler can properly inline it and avoid calling memcmp.
270 if (algop->rawsz == GIT_MAX_RAWSZ)
271 return !memcmp(sha1, sha2, GIT_MAX_RAWSZ);
272 return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
275 static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src,
276 const struct git_hash_algo *algop)
278 memcpy(sha_dst, sha_src, algop->rawsz);
281 static inline void hashclr(unsigned char *hash, const struct git_hash_algo *algop)
283 memset(hash, 0, algop->rawsz);
286 static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
288 return memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ);
291 static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
293 return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ);
296 static inline void oidcpy(struct object_id *dst, const struct object_id *src)
298 memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ);
299 dst->algo = src->algo;
302 static inline void oidread(struct object_id *oid, const unsigned char *hash,
303 const struct git_hash_algo *algop)
305 memcpy(oid->hash, hash, algop->rawsz);
306 if (algop->rawsz < GIT_MAX_RAWSZ)
307 memset(oid->hash + algop->rawsz, 0, GIT_MAX_RAWSZ - algop->rawsz);
308 oid->algo = hash_algo_by_ptr(algop);
311 static inline void oidclr(struct object_id *oid,
312 const struct git_hash_algo *algop)
314 memset(oid->hash, 0, GIT_MAX_RAWSZ);
315 oid->algo = hash_algo_by_ptr(algop);
318 static inline struct object_id *oiddup(const struct object_id *src)
320 struct object_id *dst = xmalloc(sizeof(struct object_id));
321 oidcpy(dst, src);
322 return dst;
325 static inline void oid_set_algo(struct object_id *oid, const struct git_hash_algo *algop)
327 oid->algo = hash_algo_by_ptr(algop);
331 * Converts a cryptographic hash (e.g. SHA-1) into an int-sized hash code
332 * for use in hash tables. Cryptographic hashes are supposed to have
333 * uniform distribution, so in contrast to `memhash()`, this just copies
334 * the first `sizeof(int)` bytes without shuffling any bits. Note that
335 * the results will be different on big-endian and little-endian
336 * platforms, so they should not be stored or transferred over the net.
338 static inline unsigned int oidhash(const struct object_id *oid)
341 * Equivalent to 'return *(unsigned int *)oid->hash;', but safe on
342 * platforms that don't support unaligned reads.
344 unsigned int hash;
345 memcpy(&hash, oid->hash, sizeof(hash));
346 return hash;
349 static inline int is_null_oid(const struct object_id *oid)
351 static const unsigned char null_hash[GIT_MAX_RAWSZ];
352 return !memcmp(oid->hash, null_hash, GIT_MAX_RAWSZ);
355 const char *empty_tree_oid_hex(const struct git_hash_algo *algop);
357 static inline int is_empty_blob_oid(const struct object_id *oid,
358 const struct git_hash_algo *algop)
360 return oideq(oid, algop->empty_blob);
363 static inline int is_empty_tree_oid(const struct object_id *oid,
364 const struct git_hash_algo *algop)
366 return oideq(oid, algop->empty_tree);
369 #endif