数据结构-串

串，字符串是由零个或多个字符组成的有限序列。字符的数目称为串的长度。长度为零的串称为空串。串中任意个连续的字符组成的子序列称为该串的子串。包含子串的串称为主串。字符在串中的序号称为该字符在串中的位置。

串相等，只有长度相同，对应位置字符相同的串是相等串。

对于串的操作很多，在这里仅列出以下几种：

1、串比较

equals() 判断串是否相等。

compareTo() 比较串的大小。

compareToIgnoreCase(String int) 忽略字母大小写比较。

== 判断串与地址是否相同。

equalsIgnoreCase() 忽略字母大小写判断串是否相等。

reagionMatches() 对串中的部分内容是否相同进行比较。

2、串查找

charAt(int index) 返回指定位置上的字符，索引范围从0开始。

indexOf(String str) 从0开始检索str，并返回第一次出现的位置，未出现返回-1。

indexOf(String str，int fromIndex) 从指定位置开始检索str。

lastIndexOf(String str) 查找最后一次出现的位置。

lastIndexOf(String str，int fromIndex) 从指定位置查找最后一次出现的位置。

starWith(String prefix，int toffset) 测试从指定索引开始的子串是否以指定前缀开始。

starWith(String prefix) 测试是否以指定的前缀开始。

endsWith(String suffix) 测试是否以指定的后缀结束。

3、字符串截取

public String subString(int beginIndex) 返回一个子串（它是一个新串）。

public String subString(int beginIndex，int endIndex) 返回从beginIndex开始到endIndex-1的子串。

4、字符串替换

public String replace(char oldChar，char newChar)。

public String replace(CharSequence target，CharSequence replacement) 把原来的target子序列替换为replacement序列，返回新串。

public String replaceAll(String regex，String replacement) 用正则表达式实现对字符串的匹配。

5、其他

public String trim() 去除空格。

public String toLowerCase() 将串中的所有大写字符改写为小写字符。

public String toUpperCase() 将串中的字符小写字符改写为大写字符。

public String[] split(String regex) 把串按指定的分隔字符或字符串对内容进行分割，并将分割后的结果存放在字符串数组中。

接下来贴出个人认为有助于理解的部分Java1.8String.java的源代码，希望有助于理解。

public final class String

implements java.io.Serializable, Comparable<String>, CharSequence {

/** The value is used for character storage. */

private final char value[];

/** Initializes a newly created {@code String} object so that it represents an empty character sequence. Note that use of this constructor is unnecessary since Strings are immutable. */

public String() {

this.value = new char[0];

}

/** Allocates a new {@code String} so that it represents the sequence of characters currently contained in the character array argument. The contents of the character array are copied; subsequent modification of the character array does not affect the newly created string. */

public String(char value[]) {

this.value = Arrays.copyOf(value, value.length);

}

public String(byte bytes[], int offset, int length, Charset charset) {

if (charset == null)

throw new NullPointerException("charset");

checkBounds(bytes, offset, length);

this.value = StringCoding.decode(charset, bytes, offset, length);

}

字符编码的部分很有意思，但不是数据结构的内容，先拿下。

/** Returns the length of this string. The length is equal to the number of Unicode code units in the string. */

public int length() {

return value.length;

}

/** Returns {@code true} if, and only if, {@link #length()} is {@code 0}. */

public boolean isEmpty() {

return value.length == 0;

}

/** Returns the {@code char} value at the specified index. An index ranges from {@code 0} to {@code length() - 1}. The first {@code char} value of the sequence is at index {@code 0}, the next at index {@code 1}, and so on, as for array indexing. */

public char charAt(int index) {

if ((index < 0) || (index >= value.length)) {

throw new StringIndexOutOfBoundsException(index);

}

return value[index];

}

/** Compares this string to the specified object. The result is true if and only if the argument is not null and is a String object that represents the same sequence of characters as this object. @see #compareTo(String) @see #equalsIgnoreCase(String) */

public boolean equals(Object anObject) {

if (this == anObject) {

return true;

}

if (anObject instanceof String) {①

String anotherString = (String)anObject;

int n = value.length;

if (n == anotherString.value.length) {②

char v1[] = value;

char v2[] = anotherString.value;

int i = 0;

while (n-- != 0) {③

if (v1[i] != v2[i])

return false;

i++;

}

return true;

}

return false;

}

/** 注释比代码还长，省略 */

public int compareTo(String anotherString) {

int len1 = value.length;

int len2 = anotherString.value.length;

int lim = Math.min(len1, len2);

char v1[] = value;

char v2[] = anotherString.value;

int k = 0;

while (k < lim) {

char c1 = v1[k];

char c2 = v2[k];

if (c1 != c2) {

return c1 - c2;

}

k++;

}

return len1 - len2;

}

public int indexOf(int ch, int fromIndex) {

final int max = value.length;

if (fromIndex < 0) {

fromIndex = 0;

} else if (fromIndex >= max) {

// Note: fromIndex might be near -1>>>1.

return -1;

}

if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {

// handle most cases here (ch is a BMP code point or a

// negative value (invalid code point))

final char[] value = this.value;

for (int i = fromIndex; i < max; i++) {

if (value[i] == ch) {

return i;

}

return -1;

} else {

return indexOfSupplementary(ch, fromIndex);

}

public int lastIndexOf(int ch, int fromIndex) {

if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {

// handle most cases here (ch is a BMP code point or a

// negative value (invalid code point))

final char[] value = this.value;

int i = Math.min(fromIndex, value.length - 1);

for (; i >= 0; i--) {

if (value[i] == ch) {

return i;

}

return -1;

} else {

return lastIndexOfSupplementary(ch, fromIndex);

}

static int indexOf(char[] source, int sourceOffset, int sourceCount,

char[] target, int targetOffset, int targetCount,

int fromIndex) {

if (fromIndex >= sourceCount) {

return (targetCount == 0 ? sourceCount : -1);

}

if (fromIndex < 0) {

fromIndex = 0;

}

if (targetCount == 0) {

return fromIndex;

}

char first = target[targetOffset];

int max = sourceOffset + (sourceCount - targetCount);

for (int i = sourceOffset + fromIndex; i <= max; i++) {

/* Look for first character. */

if (source[i] != first) {

while (++i <= max && source[i] != first);

}

/* Found first character, now look at the rest of v2 */

if (i <= max) {

int j = i + 1;

int end = j + targetCount - 1;

for (int k = targetOffset + 1; j < end && source[j]

== target[k]; j++, k++);

if (j == end) {

/* Found whole string. */

return i - sourceOffset;

}

return -1;

}

static int lastIndexOf(char[] source, int sourceOffset, int sourceCount,

char[] target, int targetOffset, int targetCount,

int fromIndex) {

* Check arguments; return immediately where possible. For

* consistency, don‘t check for null str.

int rightIndex = sourceCount - targetCount;

if (fromIndex < 0) {

return -1;

}

if (fromIndex > rightIndex) {

fromIndex = rightIndex;

}

/* Empty string always matches. */

if (targetCount == 0) {

return fromIndex;

}

int strLastIndex = targetOffset + targetCount - 1;

char strLastChar = target[strLastIndex];

int min = sourceOffset + targetCount - 1;

int i = min + fromIndex;

startSearchForLastChar:

while (true) {

while (i >= min && source[i] != strLastChar) {

i--;

}

if (i < min) {

return -1;

}

int j = i - 1;

int start = j - (targetCount - 1);

int k = strLastIndex - 1;

while (j > start) {

if (source[j--] != target[k--]) {

i--;

continue startSearchForLastChar;

}

return start - sourceOffset + 1;

}

public String substring(int beginIndex, int endIndex) {

if (beginIndex < 0) {

throw new StringIndexOutOfBoundsException(beginIndex);

}

if (endIndex > value.length) {

throw new StringIndexOutOfBoundsException(endIndex);

}

int subLen = endIndex - beginIndex;

if (subLen < 0) {

throw new StringIndexOutOfBoundsException(subLen);

}

return ((beginIndex == 0) && (endIndex == value.length)) ? this

: new String(value, beginIndex, subLen);

}

public String concat(String str) {

int otherLen = str.length();

if (otherLen == 0) {

return this;

}

int len = value.length;

char buf[] = Arrays.copyOf(value, len + otherLen);

str.getChars(buf, len);

return new String(buf, true);

}

public String replace(char oldChar, char newChar) {

if (oldChar != newChar) {

int len = value.length;

int i = -1;

char[] val = value; /* avoid getfield opcode */

while (++i < len) {

if (val[i] == oldChar) {

break;

}

if (i < len) {

char buf[] = new char[len];

for (int j = 0; j < i; j++) {

buf[j] = val[j];

}

while (i < len) {

char c = val[i];

buf[i] = (c == oldChar) ? newChar : c;

i++;

}

return new String(buf, true);

}

return this;

}

public boolean contains(CharSequence s) {

return indexOf(s.toString()) > -1;

}

/**

* Returns a new String composed of copies of the

* {@code CharSequence elements} joined together with a copy of

* the specified {@code delimiter}.

* <blockquote>For example,

* <pre>{@code

* String message = String.join("-", "Java", "is", "cool");

* // message returned is: "Java-is-cool"

* }</pre></blockquote>

* Note that if an element is null, then {@code "null"} is added.

* @param delimiter the delimiter that separates each element

* @param elements the elements to join together.

* @return a new {@code String} that is composed of the {@code elements}

* separated by the {@code delimiter}

* @throws NullPointerException If {@code delimiter} or {@code elements}

* is {@code null}

* @see java.util.StringJoiner

* @since 1.8

public static String join(CharSequence delimiter, CharSequence... elements) {

Objects.requireNonNull(delimiter);

Objects.requireNonNull(elements);

// Number of elements not likely worth Arrays.stream overhead.

StringJoiner joiner = new StringJoiner(delimiter);

for (CharSequence cs: elements) {

joiner.add(cs);

}

return joiner.toString();

}

/**

* Returns a new {@code String} composed of copies of the

* {@code CharSequence elements} joined together with a copy of the

* specified {@code delimiter}.

* <blockquote>For example,

* <pre>{@code

* List<String> strings = new LinkedList<>();

* strings.add("Java");strings.add("is");

* strings.add("cool");

* String message = String.join(" ", strings);

* //message returned is: "Java is cool"

* Set<String> strings = new LinkedHashSet<>();

* strings.add("Java"); strings.add("is");

* strings.add("very"); strings.add("cool");

* String message = String.join("-", strings);

* //message returned is: "Java-is-very-cool"

* }</pre></blockquote>

* Note that if an individual element is {@code null}, then {@code "null"} is added.

* @param delimiter a sequence of characters that is used to separate each

* of the {@code elements} in the resulting {@code String}

* @param elements an {@code Iterable} that will have its {@code elements}

* joined together.

* @return a new {@code String} that is composed from the {@code elements}

* argument

* @throws NullPointerException If {@code delimiter} or {@code elements}

* is {@code null}

* @see #join(CharSequence,CharSequence...)

* @see java.util.StringJoiner

* @since 1.8

public static String join(CharSequence delimiter,

Iterable<? extends CharSequence> elements) {

Objects.requireNonNull(delimiter);

Objects.requireNonNull(elements);

StringJoiner joiner = new StringJoiner(delimiter);

for (CharSequence cs: elements) {

joiner.add(cs);

}

return joiner.toString();

}

public String trim() {

int len = value.length;

int st = 0;

char[] val = value; /* avoid getfield opcode */

while ((st < len) && (val[st] <= ‘ ‘)) {

st++;

}

while ((st < len) && (val[len - 1] <= ‘ ‘)) {

len--;

}

return ((st > 0) || (len < value.length)) ? substring(st, len) : this;

}

关于串有很多细节是需要注意的，给个小例子：

public static void main(String[] args) {

long start_01 = System.currentTimeMillis();

String a = "a";

for(int i = 0 ; i < 100000 ; i++){

a += "b";

}

long end_01 = System.currentTimeMillis();

System.out.println(" + 所消耗的时间：" + (end_01 - start_01) + "毫秒");

System.out.println(a.length());a = null;

//concat()

long start_02 = System.currentTimeMillis();

String c = "c";

for(int i = 0 ; i < 100000 ; i++){

c = c.concat("d");

}

long end_02 = System.currentTimeMillis();

System.out.println("concat所消耗的时间：" + (end_02 - start_02) + "毫秒");

System.out.println(c.length());c = null;

//append

long start_03 = System.currentTimeMillis();

StringBuffer e = new StringBuffer("e");

for(int i = 0 ; i < 100000 ; i++){

e.append("d");

}

long end_03 = System.currentTimeMillis();

System.out.println("append所消耗的时间：" + (end_03 - start_03) + "毫秒");

long start_04 = System.currentTimeMillis();

String f = new String(e);

long end_04 = System.currentTimeMillis();

System.out.println("appnew所消耗的时间：" + (end_04 - start_04) + "毫秒");

System.out.println(f.length());f = null;

long start_05 = System.currentTimeMillis();

String g = e.toString();

long end_05 = System.currentTimeMillis();

System.out.println("apptsr所消耗的时间：" + (end_05 - start_05) + "毫秒");

System.out.println(g.length());

}

结果

+ 所消耗的时间：11739毫秒

100001

concat所消耗的时间：5008毫秒

100001

append所消耗的时间：1毫秒

appnew所消耗的时间：0毫秒

100001

apptsr所消耗的时间：0毫秒

100001

时间： 2024-10-06 21:06:53

数据结构-串

数据结构-串的相关文章

javascript实现数据结构: 串的块链存储表示

数据结构—串的堆分配

数据结构——串的相关算法实现

数据结构-串操作应用之词索引表

数据结构 - 串的性质和基本操作（一）

数据结构- 串的模式匹配算法：BF和 KMP算法

数据结构串

转载数据结构串的模式匹配

数据结构-串的堆分配存储