Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ The <action> type attribute can be add,update,fix,remove.
<!-- ADD -->
<action type="add" dev="ggregory" due-to="Inkeet, Gary Gregory, Wolff Bock von Wuelfingen" issue="CODEC-326">Add Base58 support.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add BaseNCodecInputStream.AbstracBuilder.setByteArray(byte[]).</action>
<action type="add" issue="CODEC-335" dev="pkarwasz" due-to="Piotr P. Karwasz">Add DigestUtils.gitBlob() and DigestUtils.gitTree() to compute Git blob and tree object identifiers.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-parent from 96 to 97.</action>
</release>
Expand Down
132 changes: 132 additions & 0 deletions src/main/java/org/apache/commons/codec/digest/DigestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,24 @@
package org.apache.commons.codec.digest;

import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeSet;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.binary.StringUtils;
Expand Down Expand Up @@ -139,6 +146,131 @@ public static byte[] digest(final MessageDigest messageDigest, final RandomAcces
return updateDigest(messageDigest, data).digest();
}

/**
* Reads through a byte array and return a generalized Git blob identifier
*
* <p>The identifier is computed in the way described by the
* <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID contents identifier</a>, but it can use any hash
* algorithm.</p>
*
* <p>When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.</p>
*
* @param messageDigest The MessageDigest to use (for example SHA-1).
* @param data Data to digest.
* @return A generalized Git blob identifier.
* @since 1.22.0
*/
public static byte[] gitBlob(final MessageDigest messageDigest, final byte[] data) {
updateDigest(messageDigest, gitBlobPrefix(data.length));
return digest(messageDigest, data);
}

/**
* Reads through a byte array and return a generalized Git blob identifier
*
* <p>The identifier is computed in the way described by the
* <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID contents identifier</a>, but it can use any hash
* algorithm.</p>
*
* <p>When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.</p>
*
* @param messageDigest The MessageDigest to use (for example SHA-1).
* @param data Data to digest.
* @param options Options how to open the file
* @return A generalized Git blob identifier.
* @throws IOException On error accessing the file
* @since 1.22.0
*/
public static byte[] gitBlob(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException {
updateDigest(messageDigest, gitBlobPrefix(Files.size(data)));
return updateDigest(messageDigest, data, options).digest();
}

private static byte[] gitBlobPrefix(final long dataSize) {
return ("blob " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
}

/**
* Returns a generalized Git tree identifier
*
* <p>The identifier is computed in the way described by the
* <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID directory identifier</a>, but it can use any hash
* algorithm.</p>
*
* <p>When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.</p>
*
* @param messageDigest The MessageDigest to use (for example SHA-1)
* @param entries The directory entries
* @return A generalized Git tree identifier.
*/
static byte[] gitTree(final MessageDigest messageDigest, final Collection<GitDirectoryEntry> entries) {
final TreeSet<GitDirectoryEntry> treeSet = new TreeSet<>(entries);
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
for (final GitDirectoryEntry entry : treeSet) {
final byte[] treeEntryBytes = entry.toTreeEntryBytes();
baos.write(treeEntryBytes, 0, treeEntryBytes.length);
}
updateDigest(messageDigest, gitTreePrefix(baos.size()));
return updateDigest(messageDigest, baos.toByteArray()).digest();
}

/**
* Reads through a byte array and return a generalized Git tree identifier
*
* <p>The identifier is computed in the way described by the
* <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID directory identifier</a>, but it can use any hash
* algorithm.</p>
*
* <p>When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.</p>
*
* @param messageDigest The MessageDigest to use (for example SHA-1).
* @param data Data to digest.
* @param options Options how to open the file
* @return A generalized Git tree identifier.
* @throws IOException On error accessing the file
* @since 1.22.0
*/
public static byte[] gitTree(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException {
final List<GitDirectoryEntry> entries = new ArrayList<>();
try (DirectoryStream<Path> files = Files.newDirectoryStream(data)) {
for (final Path path : files) {
final GitDirectoryEntry.Type type = getGitDirectoryEntryType(path);
final byte[] rawObjectId;
if (type == GitDirectoryEntry.Type.DIRECTORY) {
rawObjectId = gitTree(messageDigest, path, options);
} else {
rawObjectId = gitBlob(messageDigest, path, options);
}
entries.add(new GitDirectoryEntry(path, type, rawObjectId));
}
}
return gitTree(messageDigest, entries);
}

/**
* Returns the {@link GitDirectoryEntry.Type} of a file.
*
* @param path The file to check.
* @return A {@link GitDirectoryEntry.Type}
*/
private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path path) {
// Symbolic links first
if (Files.isSymbolicLink(path)) {
return GitDirectoryEntry.Type.SYMBOLIC_LINK;
}
if (Files.isDirectory(path)) {
return GitDirectoryEntry.Type.DIRECTORY;
}
if (Files.isExecutable(path)) {
return GitDirectoryEntry.Type.EXECUTABLE;
}
return GitDirectoryEntry.Type.REGULAR;
}

private static byte[] gitTreePrefix(final long dataSize) {
return ("tree " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
}

/**
* Gets a {@code MessageDigest} for the given {@code algorithm}.
*
Expand Down
183 changes: 183 additions & 0 deletions src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.codec.digest;

import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.Objects;

/**
* Represents a single entry in a Git tree object.
*
* <p>A Git tree object encodes a directory snapshot. Each entry holds:</p>
* <ul>
* <li>a {@link Type} that determines the Unix file mode (e.g. {@code 100644} for a regular file),</li>
* <li>the entry name (file or directory name, without a path separator),</li>
* <li>the raw object id of the referenced blob or sub-tree.</li>
* </ul>
*
* <p>Entries are ordered by {@link #compareTo} using Git's tree-sort rule: directory names are compared as if they ended with {@code '/'}, so that {@code foo/}
* sorts after {@code foobar}.</p>
*
* <p>Call {@link #toTreeEntryBytes()} to obtain the binary encoding that Git feeds to its hash function when computing the tree object identifier.</p>
*
* @see <a href="https://git-scm.com/book/en/v2/Git-Internals-Git-Objects">Git Internals – Git Objects</a>
* @see <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID Directory Identifier</a>
*/
class GitDirectoryEntry implements Comparable<GitDirectoryEntry> {

/**
* The entry name (file or directory name, no path separator).
*/
private final String name;

/**
* The key used for ordering entries within a tree object.
*
* <p>>Git appends {@code '/'} to directory names before comparing.</p>
*/
private final String sortKey;

/**
* The Git object type, which determines the Unix file-mode prefix.
*/
private final Type type;

/**
* The raw object id of the referenced blob or sub-tree.
*/
private final byte[] rawObjectId;

private static String getFileName(final Path path) {
final Path fileName = path.getFileName();
if (fileName == null) {
throw new IllegalArgumentException(path.toString());
}
return fileName.toString();
}

/**
* Creates an entry
*
* @param name The name of the entry
* @param type The type of the entry
* @param rawObjectId The id of the entry
*/
private GitDirectoryEntry(final String name, final Type type, final byte[] rawObjectId) {
this.name = name;
this.type = type;
this.sortKey = type == Type.DIRECTORY ? name + "/" : name;
this.rawObjectId = rawObjectId;
}

/**
* Creates an entry
*
* @param path The path of the entry; must not be an empty path
* @param type The type of the entry
* @param rawObjectId The id of the entry
* @throws IllegalArgumentException If the path is empty
* @throws NullPointerException If any argument is {@code null}
*/
GitDirectoryEntry(final Path path, final Type type, final byte[] rawObjectId) {
this(getFileName(path), Objects.requireNonNull(type), Objects.requireNonNull(rawObjectId));
}

/**
* Returns the binary encoding of this entry as it appears inside a Git tree object.
*
* <p>The format follows the Git tree entry layout:</p>
* <pre>
* &lt;mode&gt; SP &lt;name&gt; NUL &lt;20-byte-object-id&gt;
* </pre>
*
* @return the binary tree-entry encoding; never {@code null}
*/
byte[] toTreeEntryBytes() {
final byte[] nameBytes = name.getBytes(StandardCharsets.UTF_8);
final byte[] result = new byte[type.mode.length + nameBytes.length + rawObjectId.length + 2];
System.arraycopy(type.mode, 0, result, 0, type.mode.length);
result[type.mode.length] = ' ';
System.arraycopy(nameBytes, 0, result, type.mode.length + 1, nameBytes.length);
result[type.mode.length + nameBytes.length + 1] = '\0';
System.arraycopy(rawObjectId, 0, result, type.mode.length + nameBytes.length + 2, rawObjectId.length);
return result;
}

@Override
public int compareTo(GitDirectoryEntry o) {
return sortKey.compareTo(o.sortKey);
}

@Override
public int hashCode() {
return name.hashCode();
}

@Override
public boolean equals(Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof GitDirectoryEntry)) {
return false;
}
final GitDirectoryEntry other = (GitDirectoryEntry) obj;
return name.equals(other.name);
}

/**
* The type of a Git tree entry, which maps to a Unix file-mode string.
*
* <p>Git encodes the file type and permission bits as an ASCII octal string that precedes the entry name in the binary tree format. The values defined here
* cover the four entry types that Git itself produces.</p>
*
* <p>This enum is package-private. If it were made public, {@link #mode} would need to be wrapped in an immutable copy to prevent external mutation.</p>
*/
enum Type {

/**
* A sub-directory (Git sub-tree)
*/
DIRECTORY("40000"),

/**
* An executable file
*/
EXECUTABLE("100755"),

/**
* A regular (non-executable) file
*/
REGULAR("100644"),

/**
* A symbolic link
*/
SYMBOLIC_LINK("120000");

/**
* The ASCII-encoded octal mode string as it appears in the binary tree entry.
*/
private final byte[] mode;

Type(final String mode) {
this.mode = mode.getBytes(StandardCharsets.US_ASCII);
}
}
}
Loading
Loading