poj_2774 后缀数组

题目大意

给定两个字符串A,B,求出A和B中最长公共子串的长度。

题目分析

字符串的子串可以认为是是字符串的某个后缀的前缀，而求最长公共子串相当于A和B的某两个后缀的最长相同前缀。可以考虑使用后缀数组，将A和B连接起来，中间添加一个在A和B中都未出现过的字符隔开，然后求这个新串的后缀数组以及height数组。**height数组是后缀Suffix(SA[i])和Suffix(SA[i-1])的公共前缀的最长长度。
容易知道，满足题目要求的两个子串S1,S2在后缀数组中肯定排名相邻（用反证法可以证明）**。这样就可以利用height数组，遍历一遍 height数组，要求 SA[i]和SA[i-1]分别属于A和B，同时height最大。
求后缀数组，使用倍增算法，时间复杂度O(nlogn)；求height数组，时间复杂度O(n)；遍历height数组，求SA[i]、SA[i-1]属于不同串的height[i]最大值，时间复杂度O(n)。因此总的时间复杂度为 O(nlogn)

注意：
判断SA[i]和SA[i-1]属于不同的串，设n为第一个串的长度。通过(SA[i] - n)*(SA[i-1]-n) < 0时，由于数据过大，使用int类型会出现溢出，因此需要使用long long int类型。

实现(c++)

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<string.h>
#define LETTERS 30
#define MAX_ARRAY_SIZE 200005
int gSuffixArray[MAX_ARRAY_SIZE];
int gCount[MAX_ARRAY_SIZE];
int gOrderBySecondKey[MAX_ARRAY_SIZE];
int gRank[MAX_ARRAY_SIZE];
int gFirstKeyArray[MAX_ARRAY_SIZE];
int gHeight[MAX_ARRAY_SIZE];
int gStr[MAX_ARRAY_SIZE];
int gStrLen;
bool Compare(int* arr, int a, int b, int step){
	return arr[a] == arr[b] && arr[a + step] == arr[b + step];
}

void GetStr(char* str){
	memset(gStr, 0, sizeof(gStr));
	gStrLen = strlen(str);
	for (int i = 0; i < gStrLen; i++){
		gStr[i] = str[i] - ‘a‘ + 1;
	}
	gStr[gStrLen] = 0;
	gStrLen++;
}

void GetSuffixArray(){
	int n = gStrLen;
	memset(gCount, 0, sizeof(gCount));
	for (int i = 0; i < n; i++){
		gRank[i] = gStr[i];
		gCount[gRank[i]] ++;
	}
	int m = LETTERS;
	for (int i = 1; i < m; i++){
		gCount[i] += gCount[i - 1];
	}
	for (int i = n - 1; i >= 0; i--){
		gSuffixArray[--gCount[gRank[i]]] = i;
	}

	int step = 1;
	int *rank = gRank, *order_by_second_key = gOrderBySecondKey;
	while (step < n){
		int p = 0;

		for (int i = n - step; i < n; i++){
			order_by_second_key[p++] = i;
		}
		for (int i = 0; i < n; i++){
			if (gSuffixArray[i] >= step){
				order_by_second_key[p++] = gSuffixArray[i] - step;
			}
		}
		for (int i = 0; i < n; i++){
			gFirstKeyArray[i] = rank[order_by_second_key[i]];
		}
		for (int i = 0; i < m; i++){
			gCount[i] = 0;
		}
		for (int i = 0; i < n; i++){
			gCount[gFirstKeyArray[i]] ++;
		}
		for (int i = 1; i < m; i++){
			gCount[i] += gCount[i - 1];
		}
		for (int i = n - 1; i >= 0; i--){
			gSuffixArray[--gCount[gFirstKeyArray[i]]] = order_by_second_key[i];
		}
		int* tmp = rank; rank = order_by_second_key; order_by_second_key = tmp;
		rank[gSuffixArray[0]] = p = 0;
		for (int i = 1; i < n; i++){
			if (Compare(order_by_second_key, gSuffixArray[i], gSuffixArray[i - 1], step)){
				rank[gSuffixArray[i]] = p;
			}
			else{
				rank[gSuffixArray[i]] = ++p;
			}
		}
		m = p + 1;
		step *= 2;
	}
}
void GetHeight(){
	int n = gStrLen;
	for (int i = 0; i < n; i++){
		gRank[gSuffixArray[i]] = i;
	}
	int k = 0, j;
	for (int i = 0; i < n; i++){
		if (k){
			k--;
		}
		j = gSuffixArray[gRank[i] - 1];
		while (j + k < n && i + k < n&& gStr[i + k] == gStr[j + k]){
			k++;
		}
		gHeight[gRank[i]] = k;
	}
}

char str[MAX_ARRAY_SIZE];
int main(){
	scanf("%s", str);
	int n = strlen(str);
	str[n] = ‘a‘ + 27;
	scanf("%s", str + n + 1);
	GetStr(str);
	GetSuffixArray();
	GetHeight();
	int max = 0;
	for (int i = 1; i < gStrLen; i++){
		if (gHeight[i] > max){
			if ((gSuffixArray[i] > n && gSuffixArray[i-1] < n) || (gSuffixArray[i - 1] > n && gSuffixArray[i] < n)){
				max = gHeight[i];
			}
		}
	}
	printf("%d\n", max);
	return 0;
}

时间： 2024-12-19 04:05:38

poj_2774 后缀数组

题目大意

题目分析

实现(c++)

poj_2774 后缀数组的相关文章

SPOJ 705 Distinct Substrings（后缀数组）

hdu5769--Substring（后缀数组）

hdu 3518 Boring counting 后缀数组LCP

【tyvj1860】后缀数组

BZOJ 3238 AHOI 2013 差异后缀数组+单调栈

hdu 5030 Rabbit's String(后缀数组&二分)

hdu 4416 Good Article Good sentence(后缀数组&思维)

uva 10829 - L-Gap Substrings(后缀数组)

poj 3693 Maximum repetition substring(后缀数组)