Skip to content

Object Model API Reference

This document provides a complete API reference for the gitpy.objects module, which implements Git's four core object types.

Module Overview

from gitpy.objects import (
    GitObject,      # Abstract base class
    Blob,           # File contents
    Tree,           # Directory structure
    TreeEntry,      # Single tree entry
    Commit,         # Repository snapshot
    Identity,       # Author/committer info
    Tag,            # Annotated tag
    parse_object,   # Parse object with header
    create_object_data,  # Create object with header
    OBJECT_TYPES,   # Type registry
)

GitObject

Module: gitpy.objects.base

Abstract base class for all Git objects. Git objects are immutable, content-addressable entities identified by a SHA-1 hash of their contents.

class GitObject(ABC):
    type_name: str  # "blob", "tree", "commit", or "tag"

Properties

oid

@property
def oid(self) -> str

Object ID (SHA-1 hash). Returns a 40-character hexadecimal SHA-1 hash identifying this object.

Example:

blob = Blob(data=b"hello\n")
print(blob.oid)  # "ce013625030ba8dba906f756967f9e9ca394464a"

Methods

serialize()

@abstractmethod
def serialize(self) -> bytes

Serialize object content (without header).

Returns: The raw bytes representing this object's content.

deserialize()

@classmethod
@abstractmethod
def deserialize(cls, data: bytes) -> Self

Deserialize object content (without header).

Parameters: - data - Raw bytes of the object content.

Returns: A new instance of the object.

compute_hash()

def compute_hash(self) -> str

Compute SHA-1 hash of this object. The hash is computed over the full object data including the header: <type> <size>\0<content>.

Returns: 40-character hexadecimal SHA-1 hash.

Comparison and Hashing

def __eq__(self, other: object) -> bool
def __hash__(self) -> int

Two objects are equal if they have the same OID. Objects are hashable and can be used in sets and dicts.


Blob

Module: gitpy.objects.blob

Represents file contents in Git. A blob stores the raw contents of a single file with no filename, permissions, or other metadata.

@dataclass(slots=True)
class Blob(GitObject):
    data: bytes = b""
    type_name: str = "blob"

Attributes

Name Type Default Description
data bytes b"" Raw file contents
type_name str "blob" Object type identifier

Constructor

def __init__(self, data: bytes = b"") -> None

Parameters: - data - Raw file contents as bytes.

Example:

# Create from bytes
blob = Blob(data=b"Hello, World!\n")

# Empty blob
empty_blob = Blob()

Methods

serialize()

def serialize(self) -> bytes

Return raw content.

Returns: The raw file contents.

deserialize()

@classmethod
def deserialize(cls, data: bytes) -> Self

Create Blob from raw content.

Parameters: - data - Raw bytes of the file contents.

Returns: A new Blob instance.

from_file()

@classmethod
def from_file(cls, path: str | Path) -> Self

Create Blob from file path.

Parameters: - path - Path to the file to read.

Returns: A new Blob instance with the file's contents.

Example:

blob = Blob.from_file("README.md")
print(blob.oid)

Well-Known Hashes

Content OID
Empty (b"") e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
b"hello\n" ce013625030ba8dba906f756967f9e9ca394464a

TreeEntry

Module: gitpy.objects.tree

A single entry in a tree object, representing a file or subdirectory.

@dataclass(slots=True)
class TreeEntry:
    mode: str
    name: str
    sha: str

Attributes

Name Type Description
mode str File mode ("100644", "100755", "40000", "120000")
name str Filename without path separators
sha str 40-character hexadecimal SHA-1 hash

Constructor

def __init__(self, mode: str, name: str, sha: str) -> None

Parameters: - mode - File mode string. - name - Filename (must not contain /). - sha - 40-character SHA-1 hash.

Example:

entry = TreeEntry(
    mode="100644",
    name="README.md",
    sha="ce013625030ba8dba906f756967f9e9ca394464a"
)

Properties

is_tree

@property
def is_tree(self) -> bool

Returns True if this entry points to a tree (directory).

is_blob

@property
def is_blob(self) -> bool

Returns True if this entry points to a blob (regular file).

@property
def is_symlink(self) -> bool

Returns True if this entry is a symbolic link.

is_executable

@property
def is_executable(self) -> bool

Returns True if this entry is an executable file.

Methods

sort_key()

def sort_key(self) -> str

Generate sort key for tree entry ordering. Git sorts tree entries by name, but directories are sorted as if they had a trailing /.

Returns: Sort key string.

File Modes Reference

Mode Description
100644 Regular file (non-executable)
100755 Executable file
40000 Directory (tree)
120000 Symbolic link
160000 Gitlink (submodule)

Tree

Module: gitpy.objects.tree

Represents a directory listing in Git. A tree contains entries mapping names to blobs (files) or other trees (subdirectories).

@dataclass(slots=True)
class Tree(GitObject):
    entries: list[TreeEntry] = field(default_factory=list)
    type_name: str = "tree"

Attributes

Name Type Default Description
entries list[TreeEntry] [] List of TreeEntry objects
type_name str "tree" Object type identifier

Constructor

def __init__(self, entries: list[TreeEntry] | None = None) -> None

Parameters: - entries - List of TreeEntry objects (optional).

Example:

tree = Tree(entries=[
    TreeEntry("100644", "README.md", "ce01362..."),
    TreeEntry("40000", "src", "7a8b9c0..."),
])

Methods

serialize()

def serialize(self) -> bytes

Serialize tree to bytes.

Format: Sequence of entries, each as <mode> <name>\0<20-byte-sha>. Entries are sorted by Git's sorting rules.

Returns: Binary representation of the tree.

deserialize()

@classmethod
def deserialize(cls, data: bytes) -> Self

Parse tree from bytes.

Parameters: - data - Binary tree data (without header).

Returns: A new Tree instance.

add_entry()

def add_entry(self, mode: str, name: str, sha: str) -> None

Add an entry to this tree.

Parameters: - mode - File mode string. - name - Filename (must not contain /). - sha - 40-character SHA-1 hash.

Raises: - ValueError - If name contains /.

Example:

tree = Tree()
tree.add_entry("100644", "file.txt", "abc123...")
tree.add_entry("40000", "subdir", "def456...")

get_entry()

def get_entry(self, name: str) -> TreeEntry | None

Get entry by name.

Parameters: - name - Filename to look up.

Returns: The TreeEntry if found, None otherwise.

Example:

entry = tree.get_entry("README.md")
if entry:
    print(entry.sha)

Well-Known Hashes

Content OID
Empty tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904

Identity

Module: gitpy.objects.commit

Author or committer identity. Represents a person's identity in Git commits and tags.

@dataclass(slots=True)
class Identity:
    name: str
    email: str
    timestamp: int
    tz_offset: str

Attributes

Name Type Description
name str Person's name (can contain spaces)
email str Email address
timestamp int Unix timestamp (seconds since epoch)
tz_offset str Timezone offset as "+HHMM" or "-HHMM"

Constructor

def __init__(self, name: str, email: str, timestamp: int, tz_offset: str) -> None

Example:

author = Identity(
    name="Alice Smith",
    email="alice@example.com",
    timestamp=1699900000,
    tz_offset="-0700"
)

Methods

__str__()

def __str__(self) -> str

Format as Git identity string.

Returns: String in format "Name <email> timestamp tz_offset".

Example:

print(str(author))  # "Alice Smith <alice@example.com> 1699900000 -0700"

parse()

@classmethod
def parse(cls, line: str) -> Self

Parse "Name <email> timestamp tz" format.

Parameters: - line - Identity string from Git object.

Returns: A new Identity instance.

Example:

identity = Identity.parse("Alice <alice@example.com> 1699900000 -0700")

now()

@classmethod
def now(cls, name: str, email: str, tz_offset: str = "+0000") -> Self

Create identity with current timestamp.

Parameters: - name - Person's name. - email - Email address. - tz_offset - Timezone offset (default: "+0000" for UTC).

Returns: A new Identity instance with current time.

Example:

me = Identity.now("Bob", "bob@example.com", "-0800")

Commit

Module: gitpy.objects.commit

Represents a commit object in Git. A commit is a snapshot of the repository at a point in time.

@dataclass(slots=True)
class Commit(GitObject):
    tree_sha: str = ""
    parent_shas: list[str] = field(default_factory=list)
    author: Identity | None = None
    committer: Identity | None = None
    message: str = ""
    type_name: str = "commit"

Attributes

Name Type Default Description
tree_sha str "" SHA-1 hash of the root tree object
parent_shas list[str] [] List of parent commit SHAs
author Identity \| None None Identity of who wrote the change
committer Identity \| None None Identity of who committed the change
message str "" Commit message (can be multi-line)
type_name str "commit" Object type identifier

Constructor

def __init__(
    self,
    tree_sha: str = "",
    parent_shas: list[str] | None = None,
    author: Identity | None = None,
    committer: Identity | None = None,
    message: str = "",
) -> None

Example:

author = Identity.now("Alice", "alice@example.com")
commit = Commit(
    tree_sha="4b825dc642cb6eb9a060e54bf8d69288fbee4904",
    parent_shas=["abc123..."],
    author=author,
    committer=author,
    message="Initial commit\n\nAdd project structure."
)

Properties

is_root

@property
def is_root(self) -> bool

Returns True if this is a root commit (no parents).

is_merge

@property
def is_merge(self) -> bool

Returns True if this is a merge commit (multiple parents).

Methods

serialize()

def serialize(self) -> bytes

Serialize commit to bytes.

Returns: Binary representation of the commit.

deserialize()

@classmethod
def deserialize(cls, data: bytes) -> Self

Parse commit from bytes.

Parameters: - data - Binary commit data (without header).

Returns: A new Commit instance.


Tag

Module: gitpy.objects.tag

Represents an annotated tag object in Git. An annotated tag points to another object (usually a commit) with additional metadata.

@dataclass(slots=True)
class Tag(GitObject):
    object_sha: str = ""
    object_type: str = "commit"
    tag_name: str = ""
    tagger: Identity | None = None
    message: str = ""
    type_name: str = "tag"

Attributes

Name Type Default Description
object_sha str "" SHA-1 hash of the tagged object
object_type str "commit" Type of tagged object
tag_name str "" Name of the tag
tagger Identity \| None None Identity of who created the tag
message str "" Tag message (can be multi-line)
type_name str "tag" Object type identifier

Constructor

def __init__(
    self,
    object_sha: str = "",
    object_type: str = "commit",
    tag_name: str = "",
    tagger: Identity | None = None,
    message: str = "",
) -> None

Example:

tagger = Identity.now("Alice", "alice@example.com")
tag = Tag(
    object_sha="abc123...",
    object_type="commit",
    tag_name="v1.0.0",
    tagger=tagger,
    message="Version 1.0.0 - First stable release"
)

Methods

serialize()

def serialize(self) -> bytes

Serialize tag to bytes.

Returns: Binary representation of the tag.

deserialize()

@classmethod
def deserialize(cls, data: bytes) -> Self

Parse tag from bytes.

Parameters: - data - Binary tag data (without header).

Returns: A new Tag instance.


Module-Level Functions

parse_object()

def parse_object(data: bytes) -> tuple[str, GitObject]

Parse a complete Git object (with header).

Takes raw object data including the header and returns the SHA-1 hash and deserialized object.

Parameters: - data - Complete object data with header: <type> <size>\0<content>

Returns: Tuple of (sha, object) where sha is the 40-char hex hash.

Raises: - ValueError - If object type is unknown or size mismatches.

Example:

# Read raw object data from disk
raw_data = b"blob 6\x00hello\n"
sha, obj = parse_object(raw_data)
print(sha)  # "ce013625030ba8dba906f756967f9e9ca394464a"
print(obj.data)  # b"hello\n"

create_object_data()

def create_object_data(obj: GitObject) -> bytes

Create complete Git object data (with header) from object.

Parameters: - obj - A GitObject instance to serialize.

Returns: Complete object data ready for storage: <type> <size>\0<content>

Example:

blob = Blob(data=b"hello\n")
data = create_object_data(blob)
print(data)  # b"blob 6\x00hello\n"

OBJECT_TYPES

OBJECT_TYPES: dict[str, type[GitObject]] = {
    "blob": Blob,
    "tree": Tree,
    "commit": Commit,
    "tag": Tag,
}

Registry mapping type names to their classes.

Example:

obj_class = OBJECT_TYPES["blob"]
blob = obj_class.deserialize(b"content")

Type Hints

The module uses Python 3.12+ type hints:

from typing import Self

# Type alias (PEP 695)
type SHA = str  # 40-character hex string
type RefName = str  # Reference name like "refs/heads/main"

Complete Example

from gitpy.objects import (
    Blob, Tree, TreeEntry, Commit, Tag, Identity,
    create_object_data, parse_object
)

# Create a blob from file contents
blob = Blob(data=b"print('Hello, World!')\n")
print(f"Blob OID: {blob.oid}")

# Create a tree with the blob
tree = Tree()
tree.add_entry("100755", "hello.py", blob.oid)
print(f"Tree OID: {tree.oid}")

# Create a commit
author = Identity.now("Alice", "alice@example.com", "-0700")
commit = Commit(
    tree_sha=tree.oid,
    parent_shas=[],  # Root commit
    author=author,
    committer=author,
    message="Initial commit"
)
print(f"Commit OID: {commit.oid}")
print(f"Is root commit: {commit.is_root}")

# Create an annotated tag
tag = Tag(
    object_sha=commit.oid,
    object_type="commit",
    tag_name="v1.0.0",
    tagger=author,
    message="First release"
)
print(f"Tag OID: {tag.oid}")

# Serialize and parse roundtrip
data = create_object_data(blob)
sha, restored = parse_object(data)
assert sha == blob.oid
assert restored.data == blob.data