001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.io.UnsupportedEncodingException; 021import java.nio.ByteBuffer; 022import java.nio.charset.Charset; 023import java.nio.charset.StandardCharsets; 024 025import org.apache.commons.codec.CharEncoding; 026 027/** 028 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are 029 * specified in standard {@link Charset}. 030 * 031 * <p>This class is immutable and thread-safe.</p> 032 * 033 * @see CharEncoding 034 * @see Charset 035 * @see StandardCharsets 036 * @since 1.4 037 */ 038public class StringUtils { 039 040 /** 041 * <p> 042 * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters. 043 * </p> 044 * 045 * <p> 046 * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal. 047 * The comparison is case sensitive. 048 * </p> 049 * 050 * <pre> 051 * StringUtils.equals(null, null) = true 052 * StringUtils.equals(null, "abc") = false 053 * StringUtils.equals("abc", null) = false 054 * StringUtils.equals("abc", "abc") = true 055 * StringUtils.equals("abc", "ABC") = false 056 * </pre> 057 * 058 * <p> 059 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). 060 * </p> 061 * 062 * @see Object#equals(Object) 063 * @param cs1 064 * the first CharSequence, may be {@code null} 065 * @param cs2 066 * the second CharSequence, may be {@code null} 067 * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null} 068 * @since 1.10 069 */ 070 public static boolean equals(final CharSequence cs1, final CharSequence cs2) { 071 if (cs1 == cs2) { 072 return true; 073 } 074 if (cs1 == null || cs2 == null) { 075 return false; 076 } 077 if (cs1 instanceof String && cs2 instanceof String) { 078 return cs1.equals(cs2); 079 } 080 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length()); 081 } 082 083 /** 084 * Calls {@link String#getBytes(Charset)} 085 * 086 * @param string 087 * The string to encode (if null, return null). 088 * @param charset 089 * The {@link Charset} to encode the {@code String} 090 * @return the encoded bytes 091 */ 092 private static ByteBuffer getByteBuffer(final String string, final Charset charset) { 093 if (string == null) { 094 return null; 095 } 096 return ByteBuffer.wrap(string.getBytes(charset)); 097 } 098 099 /** 100 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte 101 * array. 102 * 103 * @param string 104 * the String to encode, may be {@code null} 105 * @return encoded bytes, or {@code null} if the input string was {@code null} 106 * @throws NullPointerException 107 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 108 * required by the Java platform specification. 109 * @see Charset 110 * @see #getBytesUnchecked(String, String) 111 * @since 1.11 112 */ 113 public static ByteBuffer getByteBufferUtf8(final String string) { 114 return getByteBuffer(string, StandardCharsets.UTF_8); 115 } 116 117 /** 118 * Calls {@link String#getBytes(Charset)} 119 * 120 * @param string 121 * The string to encode (if null, return null). 122 * @param charset 123 * The {@link Charset} to encode the {@code String} 124 * @return the encoded bytes 125 */ 126 private static byte[] getBytes(final String string, final Charset charset) { 127 return string == null ? null : string.getBytes(charset); 128 } 129 130 /** 131 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 132 * byte array. 133 * 134 * @param string 135 * the String to encode, may be {@code null} 136 * @return encoded bytes, or {@code null} if the input string was {@code null} 137 * @throws NullPointerException 138 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen 139 * since it is required by the Java platform specification. 140 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 141 * @see Charset 142 * @see #getBytesUnchecked(String, String) 143 */ 144 public static byte[] getBytesIso8859_1(final String string) { 145 return getBytes(string, StandardCharsets.ISO_8859_1); 146 } 147 148 149 /** 150 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 151 * array. 152 * <p> 153 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 154 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 155 * </p> 156 * 157 * @param string 158 * the String to encode, may be {@code null} 159 * @param charsetName 160 * The name of a required {@link java.nio.charset.Charset} 161 * @return encoded bytes, or {@code null} if the input string was {@code null} 162 * @throws IllegalStateException 163 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 164 * required charset name. 165 * @see CharEncoding 166 * @see String#getBytes(String) 167 */ 168 public static byte[] getBytesUnchecked(final String string, final String charsetName) { 169 if (string == null) { 170 return null; 171 } 172 try { 173 return string.getBytes(charsetName); 174 } catch (final UnsupportedEncodingException e) { 175 throw StringUtils.newIllegalStateException(charsetName, e); 176 } 177 } 178 179 /** 180 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 181 * array. 182 * 183 * @param string 184 * the String to encode, may be {@code null} 185 * @return encoded bytes, or {@code null} if the input string was {@code null} 186 * @throws NullPointerException 187 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is 188 * required by the Java platform specification. 189 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 190 * @see Charset 191 * @see #getBytesUnchecked(String, String) 192 */ 193 public static byte[] getBytesUsAscii(final String string) { 194 return getBytes(string, StandardCharsets.US_ASCII); 195 } 196 197 /** 198 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 199 * array. 200 * 201 * @param string 202 * the String to encode, may be {@code null} 203 * @return encoded bytes, or {@code null} if the input string was {@code null} 204 * @throws NullPointerException 205 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is 206 * required by the Java platform specification. 207 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 208 * @see Charset 209 * @see #getBytesUnchecked(String, String) 210 */ 211 public static byte[] getBytesUtf16(final String string) { 212 return getBytes(string, StandardCharsets.UTF_16); 213 } 214 215 /** 216 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 217 * array. 218 * 219 * @param string 220 * the String to encode, may be {@code null} 221 * @return encoded bytes, or {@code null} if the input string was {@code null} 222 * @throws NullPointerException 223 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is 224 * required by the Java platform specification. 225 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 226 * @see Charset 227 * @see #getBytesUnchecked(String, String) 228 */ 229 public static byte[] getBytesUtf16Be(final String string) { 230 return getBytes(string, StandardCharsets.UTF_16BE); 231 } 232 233 /** 234 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 235 * array. 236 * 237 * @param string 238 * the String to encode, may be {@code null} 239 * @return encoded bytes, or {@code null} if the input string was {@code null} 240 * @throws NullPointerException 241 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is 242 * required by the Java platform specification. 243 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 244 * @see Charset 245 * @see #getBytesUnchecked(String, String) 246 */ 247 public static byte[] getBytesUtf16Le(final String string) { 248 return getBytes(string, StandardCharsets.UTF_16LE); 249 } 250 251 /** 252 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 253 * array. 254 * 255 * @param string 256 * the String to encode, may be {@code null} 257 * @return encoded bytes, or {@code null} if the input string was {@code null} 258 * @throws NullPointerException 259 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 260 * required by the Java platform specification. 261 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 262 * @see Charset 263 * @see #getBytesUnchecked(String, String) 264 */ 265 public static byte[] getBytesUtf8(final String string) { 266 return getBytes(string, StandardCharsets.UTF_8); 267 } 268 269 private static IllegalStateException newIllegalStateException(final String charsetName, 270 final UnsupportedEncodingException e) { 271 return new IllegalStateException(charsetName + ": " + e); 272 } 273 274 /** 275 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset. 276 * 277 * @param bytes 278 * The bytes to be decoded into characters 279 * @param charset 280 * The {@link Charset} to encode the {@code String}; not {@code null} 281 * @return A new {@code String} decoded from the specified array of bytes using the given charset, 282 * or {@code null} if the input byte array was {@code null}. 283 * @throws NullPointerException 284 * Thrown if charset is {@code null} 285 */ 286 private static String newString(final byte[] bytes, final Charset charset) { 287 return bytes == null ? null : new String(bytes, charset); 288 } 289 290 /** 291 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset. 292 * <p> 293 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 294 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 295 * </p> 296 * 297 * @param bytes 298 * The bytes to be decoded into characters, may be {@code null} 299 * @param charsetName 300 * The name of a required {@link java.nio.charset.Charset} 301 * @return A new {@code String} decoded from the specified array of bytes using the given charset, 302 * or {@code null} if the input byte array was {@code null}. 303 * @throws IllegalStateException 304 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 305 * required charset name. 306 * @see CharEncoding 307 * @see String#String(byte[], String) 308 */ 309 public static String newString(final byte[] bytes, final String charsetName) { 310 if (bytes == null) { 311 return null; 312 } 313 try { 314 return new String(bytes, charsetName); 315 } catch (final UnsupportedEncodingException e) { 316 throw StringUtils.newIllegalStateException(charsetName, e); 317 } 318 } 319 320 /** 321 * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset. 322 * 323 * @param bytes 324 * The bytes to be decoded into characters, may be {@code null} 325 * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or 326 * {@code null} if the input byte array was {@code null}. 327 * @throws NullPointerException 328 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen 329 * since it is required by the Java platform specification. 330 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 331 */ 332 public static String newStringIso8859_1(final byte[] bytes) { 333 return newString(bytes, StandardCharsets.ISO_8859_1); 334 } 335 336 /** 337 * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset. 338 * 339 * @param bytes 340 * The bytes to be decoded into characters 341 * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset, 342 * or {@code null} if the input byte array was {@code null}. 343 * @throws NullPointerException 344 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is 345 * required by the Java platform specification. 346 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 347 */ 348 public static String newStringUsAscii(final byte[] bytes) { 349 return newString(bytes, StandardCharsets.US_ASCII); 350 } 351 352 /** 353 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset. 354 * 355 * @param bytes 356 * The bytes to be decoded into characters 357 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset 358 * or {@code null} if the input byte array was {@code null}. 359 * @throws NullPointerException 360 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is 361 * required by the Java platform specification. 362 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 363 */ 364 public static String newStringUtf16(final byte[] bytes) { 365 return newString(bytes, StandardCharsets.UTF_16); 366 } 367 368 /** 369 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset. 370 * 371 * @param bytes 372 * The bytes to be decoded into characters 373 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset, 374 * or {@code null} if the input byte array was {@code null}. 375 * @throws NullPointerException 376 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is 377 * required by the Java platform specification. 378 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 379 */ 380 public static String newStringUtf16Be(final byte[] bytes) { 381 return newString(bytes, StandardCharsets.UTF_16BE); 382 } 383 384 /** 385 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset. 386 * 387 * @param bytes 388 * The bytes to be decoded into characters 389 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset, 390 * or {@code null} if the input byte array was {@code null}. 391 * @throws NullPointerException 392 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is 393 * required by the Java platform specification. 394 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 395 */ 396 public static String newStringUtf16Le(final byte[] bytes) { 397 return newString(bytes, StandardCharsets.UTF_16LE); 398 } 399 400 /** 401 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset. 402 * 403 * @param bytes 404 * The bytes to be decoded into characters 405 * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset, 406 * or {@code null} if the input byte array was {@code null}. 407 * @throws NullPointerException 408 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 409 * required by the Java platform specification. 410 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 411 */ 412 public static String newStringUtf8(final byte[] bytes) { 413 return newString(bytes, StandardCharsets.UTF_8); 414 } 415 416}