CharsKit.java
package space.sunqian.fs.base.chars;
import space.sunqian.annotation.Nonnull;
import space.sunqian.annotation.Nullable;
import space.sunqian.fs.Fs;
import space.sunqian.fs.base.system.SystemKeys;
import space.sunqian.fs.io.IORuntimeException;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
/**
* Utilities for {@code char}, {@code char array} and {@link Charset}.
*
* @author sunqian
*/
public class CharsKit {
/**
* Charset: UTF-8.
*/
public static final @Nonnull Charset UTF_8 = StandardCharsets.UTF_8;
/**
* Charset: ISO-8859-1.
*/
public static final @Nonnull Charset ISO_8859_1 = StandardCharsets.ISO_8859_1;
private static final char @Nonnull [] EMPTY = {};
private static final @Nonnull CharBuffer EMPTY_BUFFER = CharBuffer.wrap(EMPTY);
/**
* Returns whether the given buffer is null or empty.
*
* @param buffer the given buffer
* @return whether the given buffer is null or empty
*/
public static boolean isEmpty(@Nullable CharBuffer buffer) {
return buffer == null || !buffer.hasRemaining();
}
/**
* Returns an empty char array.
*
* @return an empty char array
*/
public static char @Nonnull [] empty() {
return EMPTY;
}
/**
* Returns an empty char buffer.
*
* @return an empty char buffer
*/
public static @Nonnull CharBuffer emptyBuffer() {
return EMPTY_BUFFER;
}
/**
* Returns the default charset: {@link #UTF_8}.
*
* @return the default charset: {@link #UTF_8}
*/
public static @Nonnull Charset defaultCharset() {
return UTF_8;
}
/**
* Returns the latin charset: {@link #ISO_8859_1}.
*
* @return the latin charset: {@link #ISO_8859_1}
*/
public static @Nonnull Charset latinCharset() {
return ISO_8859_1;
}
/**
* If the {@link #nativeCharset()} is not {@code null}, returns {@link #nativeCharset()}, otherwise returns
* {@link #jvmCharset()}.
*
* @return if the {@link #nativeCharset()} is not {@code null}, returns {@link #nativeCharset()}, otherwise returns
* {@link #jvmCharset()}.
*/
public static @Nonnull Charset localCharset() {
return Fs.nonnull(nativeCharset(), jvmCharset());
}
/**
* Returns the default charset of the JVM. It is equivalent to the {@link Charset#defaultCharset()}.
*
* @return the default charset of the JVM
* @see Charset#defaultCharset()
*/
public static @Nonnull Charset jvmCharset() {
return Charset.defaultCharset();
}
/**
* Returns the charset from the host environment, which is typically the charset of current OS.
* <p>
* This method is <b>not</b> equivalent to the {@link #jvmCharset()}, it will search the system properties in the
* following order:
* <ul>
* <li>native.encoding</li>
* <li>sun.jnu.encoding</li>
* <li>file.encoding</li>
* </ul>
* It may return {@code null} if not found.
*
* @return the charset from the host environment, which is typically the charset of current OS
*/
public static @Nullable Charset nativeCharset() {
return Natives.NATIVE_CHARSET;
}
/**
* Returns the charset with the specified name, may be {@code null} if searching fails.
*
* @param name the specified name
* @return the charset with the specified name
*/
public static @Nullable Charset charset(String name) {
try {
return Charset.forName(name);
} catch (Exception e) {
return null;
}
}
/**
* Converts the given char to the corresponding Unicode escape string. For example:
* <ul>
* <li>{@code 'a'} -> {@code \\u0061}</li>
* <li>{@code 'h'} -> {@code \\u0068}</li>
* <li>{@code '中'} -> {@code \\u4E20}</li>
* </ul>
*
* @param c the given char
* @return the corresponding Unicode escape string
*/
public static @Nonnull String toUnicode(char c) {
return toUnicode(c, true);
}
/**
* Converts the given char to the corresponding Unicode escape string. For example:
* <ul>
* <li>{@code 'a'} -> {@code \\u0061}</li>
* <li>{@code 'h'} -> {@code \\u0068}</li>
* <li>{@code '中'} -> {@code \\u4E20}</li>
* </ul>
*
* @param c the given char
* @param uppercase {@code true} to use uppercase letters, {@code false} to use lowercase letters
* @return the corresponding Unicode escape string
*/
public static @Nonnull String toUnicode(char c, boolean uppercase) {
return CharToUnicode.charToUnicode(c, uppercase);
}
/**
* Converts the given char to the corresponding Unicode escape string and appends it to the given appender. For
* example:
* <ul>
* <li>{@code 'a'} -> {@code \\u0061}</li>
* <li>{@code 'h'} -> {@code \\u0068}</li>
* <li>{@code '中'} -> {@code \\u4E20}</li>
* </ul>
*
* @param c the given char
* @param uppercase {@code true} to use uppercase letters, {@code false} to use lowercase letters
* @param appender the appender to append the result
*/
public static void toUnicode(char c, boolean uppercase, @Nonnull Appendable appender) throws IORuntimeException {
try {
CharToUnicode.charToUnicode(c, uppercase, appender);
} catch (Exception e) {
throw new IORuntimeException(e);
}
}
/**
* Converts the Unicode escape sequence to the corresponding character. For example:
* <ul>
* <li>{@code \\u0061} -> {@code 'a'}</li>
* <li>{@code \\u0068} -> {@code 'h'}</li>
* <li>{@code \\u4E20} -> {@code '中'}</li>
* </ul>
*
* @param c1 the first character of the Unicode escape sequence
* @param c2 the second character of the Unicode escape sequence
* @param c3 the third character of the Unicode escape sequence
* @param c4 the fourth character of the Unicode escape sequence
* @return the corresponding character
*/
public static char unicodeToChar(char c1, char c2, char c3, char c4) {
int digits = unicodeToDigits(c1);
digits <<= 4;
digits |= unicodeToDigits(c2);
digits <<= 4;
digits |= unicodeToDigits(c3);
digits <<= 4;
digits |= unicodeToDigits(c4);
return (char) digits;
}
private static int unicodeToDigits(char c) {
int digit;
if (c >= '0' && c <= '9') {
digit = c - '0';
} else if (c >= 'A' && c <= 'F') {
digit = c - 'A' + 10;
} else if (c >= 'a' && c <= 'f') {
digit = c - 'a' + 10;
} else {
throw new IllegalArgumentException("Illegal hex character: " + c);
}
return digit;
}
private static final class Natives {
private static final @Nullable Charset NATIVE_CHARSET = searchNativeCharset();
private static @Nullable Charset searchNativeCharset() {
return search(
SystemKeys.NATIVE_ENCODING,
"sun.jnu.encoding",
SystemKeys.FILE_ENCODING
);
}
@SuppressWarnings("SameParameterValue")
private static @Nullable Charset search(@Nonnull String @Nonnull ... proName) {
for (String s : proName) {
String prop = System.getProperty(s);
@Nullable Charset charset = charset(prop);
if (charset != null) {
return charset;
}
}
return null;
}
}
private static final class CharToUnicode {
private static final char[] UPPERS =
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
private static final char[] LOWERS =
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
private static String charToUnicode(char ch, boolean uppercase) {
char[] result = new char[6];
result[0] = '\\';
result[1] = 'u';
char[] dict = uppercase ? UPPERS : LOWERS;
int code = ch & 0xFFFF;
result[2] = dict[(code >>> 12) & 0x0F];
result[3] = dict[(code >>> 8) & 0x0F];
result[4] = dict[(code >>> 4) & 0x0F];
result[5] = dict[code & 0x0F];
return new String(result);
}
private static void charToUnicode(char ch, boolean uppercase, @Nonnull Appendable appender) throws Exception {
char c0 = '\\';
char c1 = 'u';
char[] dict = uppercase ? UPPERS : LOWERS;
int code = ch & 0xFFFF;
char c2 = dict[(code >>> 12) & 0x0F];
char c3 = dict[(code >>> 8) & 0x0F];
char c4 = dict[(code >>> 4) & 0x0F];
char c5 = dict[code & 0x0F];
appender.append(c0).append(c1).append(c2).append(c3).append(c4).append(c5);
}
}
private CharsKit() {
}
}