blob: 40a794376233dc958046c799102055b33bccb3b4 [file] [log] [blame]
// Copyright 2016 The Vanadium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package io.v.todos.persistence.syncbase;
import com.google.common.math.LongMath;
import java.util.UUID;
public class IdGenerator {
// TODO(rosswang): Escape collection IDs to allow any character. A lot of this logic can be
// simplified if we can use any character.
// TODO(rosswang): Put this into Syncbase utils.
private final IdAlphabet mAlphabet;
/**
* the length we *would* need if the alphabet were uniform
*/
private final int mLongUniformRadixLength;
private final int mLongEncodedLength;
/**
* This is related to unsigned division.
* @see #transformToSignedDivision(long)
*/
private final int mMaxSafeMostSignificantDigit;
/**
* This is related to unsigned division.
* @see #transformToSignedDivision(long)
*/
private final long mMostSignificantUnit, mMaxSafeMostSignificantValue;
private static final double LOG_LONG = Long.SIZE * Math.log(2);
/**
* @param alphabet the alphabet of legal characters for IDs generated by this generator
* @param perturb whether or not to perturb generated IDs by a random perturbation to avoid
* collisions in distributed key generation. Pass {@code false} if this generator
* is used to generate a sort key that is a secondary key where ordering
* conflicts can be deterministically resolved by the primary key. This setting
* only applies to the {@code generate...} methods.
*/
public IdGenerator(IdAlphabet alphabet, boolean perturb) {
mAlphabet = alphabet;
mPerturb = perturb;
// Find the maximum number of digits we'd need to encode an unsigned long if the radix were
// uniform for the entire string.
// ceil(log_RADIX(Long.MAX + 1))
mLongUniformRadixLength = (int) Math.ceil(LOG_LONG / Math.log(alphabet.radix()));
mMostSignificantUnit = LongMath.pow(alphabet.radix(), mLongUniformRadixLength - 1);
mMaxSafeMostSignificantDigit = (int) (Long.MAX_VALUE / mMostSignificantUnit);
mMaxSafeMostSignificantValue = mMaxSafeMostSignificantDigit * mMostSignificantUnit;
// The first character may have additional restrictions, so we have less chars to work with
// for the first digit. If we still have enough digits, we'll proceed and just use a special
// mapping for the leading digit. Otherwise, we'll pad by a leading zero, thus making the
// remaining encoding the common case.
// So, what most-significant digit do we need to encode the max unsigned value?
PartialDivisionResult div = transformToSignedDivision(-1);
int maxMostSignficantDigit = div.partialMostSignificantDigit +
(int) (div.partialRemainder / mMostSignificantUnit);
mLongEncodedLength = maxMostSignficantDigit < alphabet.leadingRadix() ?
mLongUniformRadixLength : mLongUniformRadixLength + 1;
}
private static class PartialDivisionResult {
public int partialMostSignificantDigit;
public long partialRemainder;
public PartialDivisionResult(int partialMostSignificantDigit, long partialRemainder) {
this.partialMostSignificantDigit = partialMostSignificantDigit;
this.partialRemainder = partialRemainder;
}
}
/**
* Performs partial calculation of the most significant digit for the alphabet-radix
* representation of {@code unsigned}. To achieve unsigned division, we first assume the
* most significant digit is at least as large as the max safe most significant digit for signed
* division, add one more if neccessary to push the "remainder" positive, and then do normal
* division for the rest.
* <p>
* TODO(rosswang): Android N will introduce some Java 8 support. If this includes <a
* href="https://docs.oracle.com/javase/8/docs/api/java/lang/Long.html#divideUnsigned-long-long-">
* Long.divideUnsigned(long, long)</a> and it is backwards compatible, use that instead.
*/
private PartialDivisionResult transformToSignedDivision(long unsigned) {
if (unsigned < 0) {
long naiveRemainder = unsigned - mMaxSafeMostSignificantValue;
return naiveRemainder < 0 ?
// need one more
new PartialDivisionResult(mMaxSafeMostSignificantDigit + 1,
naiveRemainder - mMostSignificantUnit) :
new PartialDivisionResult(mMaxSafeMostSignificantDigit, naiveRemainder);
} else {
// Already positive; handle with normal arithmetic.
return new PartialDivisionResult(0, unsigned);
}
}
/**
* Converts a signed {@code long} to a {@code String} consistent with the pattern
* {@code [0-9A-Za-z][0-9A-Za-z_]*}, required by Syncbase for
* {@link io.v.v23.services.syncbase.Id} names, with lexicographic ordering consistent with the
* numeric ordering.
* <p>
* Example transformations using {@link IdAlphabets#COLLECTION_ID}:
* <pre>
* -9223372036854775808 -> 00000000000
* -9223372036854775807 -> 00000000001
* -9223372036854775806 -> 00000000002
* -1 -> 9MxfBQuKjH7
* 0 -> 9MxfBQuKjH8
* 1 -> 9MxfBQuKjH9
* 9223372036854775805 -> IivLMqoeTYD
* 9223372036854775806 -> IivLMqoeTYE
* 9223372036854775807 -> IivLMqoeTYF
* </pre>
*/
public String longToPaddedIdentifier(long x) {
StringBuilder s = new StringBuilder(mLongEncodedLength);
long unsignedWithSameOrdering = x - Long.MIN_VALUE;
PartialDivisionResult signedSubproblem =
transformToSignedDivision(unsignedWithSameOrdering);
long leftToEncode = signedSubproblem.partialRemainder;
for (int placeValue = 0; placeValue < mLongUniformRadixLength - 1; placeValue++) {
if (leftToEncode > 0) {
s.append(mAlphabet.encodeDigit((int) (leftToEncode % mAlphabet.radix())));
leftToEncode /= mAlphabet.radix();
} else {
s.append(mAlphabet.encodeDigit(0));
}
}
// The most-significant digit needs to be handled specially, accounting for the signed
// division adjustment and using a possibly different character mapping.
int mostSignificantDigit = signedSubproblem.partialMostSignificantDigit +
(int) leftToEncode;
if (mLongEncodedLength > mLongUniformRadixLength) {
// just 0-pad
s.append(mAlphabet.encodeDigit(mostSignificantDigit))
.append(mAlphabet.encodeLeadingDigit(0));
} else {
// use the special leading-digit mapping
s.append(mAlphabet.encodeLeadingDigit(mostSignificantDigit));
}
return s.reverse().toString();
}
/**
* Perturbs IDs to mitigate collision during distributed, eventually consistent insertion.
* {@link UUID#randomUUID()} is used for perturbation. The perturbed ID is strictly greater than
* the base ID but less than the next increment from the base ID except in the degenerate case
* where the base ID must be extended to increment.
*/
public String perturb(String baseId) {
UUID uuid = UUID.randomUUID();
return baseId +
longToPaddedIdentifier(uuid.getMostSignificantBits()) +
longToPaddedIdentifier(uuid.getLeastSignificantBits());
}
/**
* Increments an ID with a proposed base length. If the increment cannot be done under these
* constraints, the ID is extended. The incremented ID is guaranteed to be greater than the
* given {@code fullId}.
* <p>
* Example transformations using {@link IdAlphabets#COLLECTION_ID} with {@code proposedCutoff =
* 2}:
* <pre>
* 0000 -> 01
* A -> A0
* AZ00 -> A_
* Az00 -> B0
* Zz00 -> a0
* zz00 -> zz1
* zzz0 -> zzz1
* zzzz -> zzzz0
* </pre>
* <p>
* Although in most cases only the characters prior to {@code proposedCutoff} will be used,
* additional characters past the cutoff may be needed if the prefix cannot be incremented.
* (e.g. 999 in the [0-9]+ alphabet)
* <p>
* {@code proposedCutoff} should be set in a way similar to the index bit depth (typically
* integer, though we default to longs here to be compatible with timestamps). Setting it too
* high can result in unnecessarily long strings with unused prefixes while setting it too low
* can result in O(n) length growth as most-significant characters are greedily exhausted. If it
* is greater than the length of {@code fullId}, the ID is extended by the alphabet's 0
* character as the method of incrementation.
*
* @param fullId the full ID to be incremented
* @param proposedCutoff the preferred length of the ID to keep after incrementing
*/
public String increment(String fullId, int proposedCutoff) {
StringBuilder mutable = new StringBuilder(fullId);
if (proposedCutoff <= fullId.length()) {
// First try normal incrementing
for (int i = proposedCutoff - 1; i > 0; i--) {
int newValue = mAlphabet.decodeDigit(mutable.charAt(i)) + 1;
if (newValue < mAlphabet.radix()) {
mutable.setCharAt(i, mAlphabet.encodeDigit(newValue));
mutable.setLength(proposedCutoff);
return mutable.toString();
} else {
mutable.setCharAt(i, mAlphabet.encodeDigit(0));
}
}
// Process the leading digit specially
if (proposedCutoff > 0) {
int newValue = mAlphabet.decodeLeadingDigit(mutable.charAt(0)) + 1;
if (newValue < mAlphabet.leadingRadix()) {
mutable.setCharAt(0, mAlphabet.encodeLeadingDigit(newValue));
mutable.setLength(proposedCutoff);
return mutable.toString();
}
}
// If we've gotten to this point, we can't increment the prefix; start looking at the
// suffix.
// Reset mutable
mutable.setLength(0);
mutable.append(fullId);
for (int i = proposedCutoff; i < fullId.length(); i++) {
int newValue = mAlphabet.decodeDigit(mutable.charAt(i)) + 1;
if (newValue < mAlphabet.radix()) {
mutable.setCharAt(i, mAlphabet.encodeDigit(newValue));
mutable.setLength(i + 1);
return mutable.toString();
}
}
}
// By this point, we just have to extend the string.
mutable.append(mAlphabet.encodeDigit(0));
return mutable.toString();
}
public String increment(String previous) {
return increment(previous, mLongEncodedLength);
}
private final Object mLargestKnownIdMutex = new Object();
private String mLargestKnownId;
private final boolean mPerturb;
public String generateTailId() {
String candidate = longToPaddedIdentifier(System.currentTimeMillis());
synchronized (mLargestKnownIdMutex) {
if (mLargestKnownId != null && candidate.compareTo(mLargestKnownId) <= 0) {
candidate = increment(mLargestKnownId);
}
return mLargestKnownId = mPerturb ? perturb(candidate) : candidate;
}
}
public void registerId(String id) {
synchronized (mLargestKnownIdMutex) {
if (mLargestKnownId == null || id.compareTo(mLargestKnownId) > 0) {
mLargestKnownId = id;
}
}
}
}