Storage API Reference
This document provides a complete API reference for the gitpy.storage and gitpy.repository modules, which implement Git's object storage and repository management.
Module Overview
from gitpy.storage import (
compress, # Zlib compression
decompress, # Zlib decompression
decompress_stream, # Stream decompression
LooseObjectStore, # Low-level loose object storage
ObjectDatabase, # High-level object database
)
from gitpy.repository import Repository # Repository abstraction
Compression Utilities
Module: gitpy.storage.compression
Git uses zlib compression for all stored objects.
compress()
def compress(data: bytes, level: int = DEFAULT_LEVEL) -> bytes
Compress data using zlib.
Parameters:
- data - Raw bytes to compress.
- level - Compression level (0-9, -1 for default). Default uses zlib.Z_DEFAULT_COMPRESSION.
Returns: Compressed bytes.
Example:
from gitpy.storage import compress
compressed = compress(b"hello world")
decompress()
def decompress(data: bytes) -> bytes
Decompress zlib data.
Parameters:
- data - Compressed bytes.
Returns: Decompressed bytes.
Raises:
- zlib.error - Invalid compressed data.
Example:
from gitpy.storage import decompress
original = decompress(compressed_data)
decompress_stream()
def decompress_stream(data: bytes) -> tuple[bytes, bytes]
Decompress data, returning decompressed content and remaining bytes.
Useful for packfiles where multiple compressed streams are concatenated.
Parameters:
- data - Compressed bytes (possibly with trailing data).
Returns: Tuple of (decompressed_content, remaining_bytes).
Example:
from gitpy.storage import decompress_stream
# Decompress first stream from concatenated data
content, remaining = decompress_stream(packed_data)
LooseObjectStore
Module: gitpy.storage.loose
Low-level interface to loose object storage. Loose objects are individual zlib-compressed files stored in .git/objects/<sha[0:2]>/<sha[2:40]>.
class LooseObjectStore:
objects_dir: Path # Path to .git/objects/
Constructor
def __init__(self, git_dir: Path) -> None
Initialize loose object store.
Parameters:
- git_dir - Path to .git directory.
Example:
from pathlib import Path
from gitpy.storage import LooseObjectStore
store = LooseObjectStore(Path("/path/to/repo/.git"))
Methods
exists()
def exists(self, sha: str) -> bool
Check if object exists in store.
Parameters:
- sha - 40-character hex SHA-1.
Returns: True if object exists, False otherwise.
read()
def read(self, sha: str) -> bytes
Read and decompress object.
Parameters:
- sha - 40-character hex SHA-1.
Returns: Decompressed object data (with header).
Raises:
- FileNotFoundError - Object doesn't exist.
- zlib.error - Decompression failed.
- ValueError - SHA mismatch (corrupted object).
Example:
data = store.read("ce013625030ba8dba906f756967f9e9ca394464a")
# Returns: b"blob 6\x00hello\n"
write()
def write(self, sha: str, data: bytes) -> Path
Compress and write object atomically.
Parameters:
- sha - 40-character hex SHA-1.
- data - Complete object data (with header).
Returns: Path to written object.
Note: Write is atomic (uses temp file + rename). If object already exists, returns existing path without rewriting.
Example:
from gitpy.objects import Blob, create_object_data
import hashlib
blob = Blob(data=b"hello\n")
data = create_object_data(blob)
sha = hashlib.sha1(data, usedforsecurity=False).hexdigest()
path = store.write(sha, data)
delete()
def delete(self, sha: str) -> bool
Delete object (for garbage collection).
Parameters:
- sha - 40-character hex SHA-1.
Returns: True if deleted, False if didn't exist.
iter_objects()
def iter_objects(self) -> Iterator[str]
Iterate over all object SHAs in store.
Yields: 40-character hex SHA-1 strings for each object.
Example:
for sha in store.iter_objects():
print(sha)
ObjectDatabase
Module: gitpy.storage.database
High-level interface to Git object storage. Provides type-safe access with automatic serialization, compression, and SHA computation.
class ObjectDatabase:
git_dir: Path # Path to .git directory
loose: LooseObjectStore # Underlying loose object store
Constructor
def __init__(self, git_dir: Path) -> None
Initialize object database.
Parameters:
- git_dir - Path to .git directory.
Example:
from pathlib import Path
from gitpy.storage import ObjectDatabase
db = ObjectDatabase(Path("/path/to/repo/.git"))
Methods
exists()
def exists(self, sha: str) -> bool
Check if object exists.
Parameters:
- sha - Full or abbreviated SHA-1.
Returns: True if object exists, False otherwise.
Example:
if db.exists("ce01362"): # Short SHA supported
print("Object found!")
read()
def read(self, sha: str) -> GitObject
Read and parse object.
Parameters:
- sha - Object SHA (full or abbreviated).
Returns: Parsed GitObject (Blob, Tree, Commit, or Tag).
Raises:
- FileNotFoundError - Object not found.
- ValueError - Invalid object format.
Example:
obj = db.read("ce013625030ba8dba906f756967f9e9ca394464a")
print(obj.type_name) # "blob"
read_blob()
def read_blob(self, sha: str) -> Blob
Read object as Blob.
Parameters:
- sha - Object SHA (full or abbreviated).
Returns: Blob object.
Raises:
- TypeError - Object is not a blob.
- FileNotFoundError - Object not found.
read_tree()
def read_tree(self, sha: str) -> Tree
Read object as Tree.
Parameters:
- sha - Object SHA (full or abbreviated).
Returns: Tree object.
Raises:
- TypeError - Object is not a tree.
- FileNotFoundError - Object not found.
read_commit()
def read_commit(self, sha: str) -> Commit
Read object as Commit.
Parameters:
- sha - Object SHA (full or abbreviated).
Returns: Commit object.
Raises:
- TypeError - Object is not a commit.
- FileNotFoundError - Object not found.
read_tag()
def read_tag(self, sha: str) -> Tag
Read object as Tag.
Parameters:
- sha - Object SHA (full or abbreviated).
Returns: Tag object.
Raises:
- TypeError - Object is not a tag.
- FileNotFoundError - Object not found.
read_raw()
def read_raw(self, sha: str) -> bytes
Read raw object data (decompressed, with header).
Parameters:
- sha - Object SHA (full or abbreviated).
Returns: Raw object bytes.
Raises:
- FileNotFoundError - Object not found.
write()
def write(self, obj: GitObject) -> str
Write object to storage.
Parameters:
- obj - GitObject to write.
Returns: SHA-1 of written object.
Example:
from gitpy.objects import Blob
blob = Blob(data=b"hello world")
sha = db.write(blob)
print(sha) # "95d09f2b10159347eece71399a7e2e907ea3df4f"
hash_object()
def hash_object(self, obj: GitObject, *, write: bool = True) -> str
Compute SHA of object, optionally storing it.
Parameters:
- obj - GitObject to hash.
- write - If True, also write to storage.
Returns: SHA-1 hash.
Example:
# Just compute hash without storing
sha = db.hash_object(blob, write=False)
get_type()
def get_type(self, sha: str) -> str
Get object type without full parse.
Parameters:
- sha - Object SHA (full or abbreviated).
Returns: Object type name ("blob", "tree", "commit", or "tag").
get_size()
def get_size(self, sha: str) -> int
Get object size without full parse.
Parameters:
- sha - Object SHA (full or abbreviated).
Returns: Size in bytes of the object content.
Repository
Module: gitpy.repository
Represents a Git repository. Provides access to all repository components: objects, references, index, etc.
class Repository:
worktree: Path # Working directory path
git_dir: Path # .git directory path
objects: ObjectDatabase # Object database
Constructor
def __init__(self, path: Path, git_dir: Path | None = None) -> None
Open existing repository.
Parameters:
- path - Working directory path.
- git_dir - .git directory (default: path/.git).
Raises:
- ValueError - Not a git repository.
Example:
from pathlib import Path
from gitpy.repository import Repository
repo = Repository(Path("/path/to/repo"))
Class Methods
init()
@classmethod
def init(cls, path: Path, *, bare: bool = False) -> Self
Initialize a new repository.
Parameters:
- path - Where to create repository.
- bare - If True, create bare repository (no working directory).
Returns: Newly created Repository.
Raises:
- ValueError - Already a git repository.
Example:
# Create normal repository
repo = Repository.init(Path("/path/to/new/repo"))
# Create bare repository
bare_repo = Repository.init(Path("/path/to/repo.git"), bare=True)
Created Structure:
.git/
├── HEAD # ref: refs/heads/main
├── config # Repository config
├── description # Repo description
├── objects/
│ ├── info/
│ └── pack/
├── refs/
│ ├── heads/
│ └── tags/
└── info/
└── exclude # Local gitignore
find()
@classmethod
def find(cls, start_path: Path | None = None) -> Self
Find repository containing path.
Searches up directory tree for .git directory.
Parameters:
- start_path - Where to start search (default: cwd).
Returns: Repository containing start_path.
Raises:
- ValueError - Not inside a repository.
Example:
# Find repo from current directory
repo = Repository.find()
# Find repo from specific path
repo = Repository.find(Path("/path/to/repo/src/deep/nested"))
Complete Example
from pathlib import Path
from gitpy.repository import Repository
from gitpy.objects import Blob, Tree, TreeEntry, Commit, Identity
# Initialize a new repository
repo = Repository.init(Path("/tmp/my-repo"))
# Create and store a blob
readme_blob = Blob(data=b"# My Project\n\nWelcome to my project!\n")
readme_sha = repo.objects.write(readme_blob)
print(f"README blob: {readme_sha}")
# Create and store a tree
tree = Tree(entries=[
TreeEntry(mode=0o100644, name="README.md", sha=readme_sha),
])
tree_sha = repo.objects.write(tree)
print(f"Root tree: {tree_sha}")
# Create and store a commit
author = Identity(
name="Alice",
email="alice@example.com",
timestamp=1704067200,
tz_offset=0,
)
commit = Commit(
tree_sha=tree_sha,
parent_shas=[],
author=author,
committer=author,
message="Initial commit\n",
)
commit_sha = repo.objects.write(commit)
print(f"Commit: {commit_sha}")
# Read back the commit
restored_commit = repo.objects.read_commit(commit_sha)
print(f"Message: {restored_commit.message}")
print(f"Author: {restored_commit.author.name}")
# Use short SHA
obj = repo.objects.read(commit_sha[:7])
print(f"Type: {obj.type_name}")
Git Compatibility
The storage module is fully compatible with Git:
| Test | Result |
|---|---|
| gitpy reads Git-written objects | ✅ |
| Git reads gitpy-written objects | ✅ |
| Empty blob hash matches | ✅ e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 |
| Empty tree hash matches | ✅ 4b825dc642cb6eb9a060e54bf8d69288fbee4904 |
"hello\n" blob hash matches |
✅ ce013625030ba8dba906f756967f9e9ca394464a |
Verification:
# Create object with Git, read with gitpy
echo "hello" | git hash-object -w --stdin
# gitpy can read this object
# Create object with gitpy, read with Git
# After gitpy writes an object:
git cat-file -p <sha> # Works!
Type Hints
The module uses Python 3.12+ type hints:
from typing import Self
from pathlib import Path
from collections.abc import Iterator
See Also
- Pack Objects API: Pack files, delta compression, and pack indexes