2014-04-29 00:20
题目:给定一个长字符串,和一个词典。如果允许你将长串分割成若干个片段,可能会存在某些片段在词典里查不到,有些则查得到。请设计算法进行分词,使得查不到的片段个数最少。
解法:用空间换取时间的动态规划算法,首先用O(n^2)的时间判断每一个片段是否在字典里。这个过程其实可以通过字典树来进行加速,时间上能优化一个阶,不过我没写,偷懒用<unordered_set>代表了字典。之后通过O(n)时间的动态规划,dp[i]表示当前位置的查不到的片段的最少个数。对于懂代码的人,代码说的比文字清楚,所以请看代码。
代码:
1 // 17.14 Given a dictionary of words, and a long string. You may find a way to cut the string into words, where some of them may or may not be in the dictionary.
2 // Dynamic programming is a good thing, but trades space in for time.
3 #include <iostream>
4 #include <string>
5 #include <unordered_set>
6 #include <vector>
7 using namespace std;
8
9 int main()
10 {
11 string data;
12 unordered_set<string> dict;
13 vector<vector<bool> > contains;
14 vector<int> dp;
15 int i, j;
16 string s;
17 int n;
18 int tmp;
19
20 while (cin >> data && data != "") {
21 cin >> n;
22 for (i = 0; i < n; ++i) {
23 cin >> s;
24 dict.insert(s);
25 }
26 n = (int)data.length();
27
28 contains.resize(n);
29 for (i = 0; i < n; ++i) {
30 contains[i].resize(n);
31 }
32 for (i = 0; i < n; ++i) {
33 s = "";
34 for (j = i; j < n; ++j) {
35 s.push_back(data[j]);
36 contains[i][j] = (dict.find(s) != dict.end());
37 }
38 }
39
40 dp.resize(n);
41 for (i = 0; i < n; ++i) {
42 dp[i] = contains[0][i] ? 0 : i + 1;
43 for (j = 0; j < i; ++j) {
44 tmp = dp[j] + (contains[j + 1][i] ? 0 : i - j);
45 dp[i] = dp[i] < tmp ? dp[i] : tmp;
46 }
47 }
48
49 printf("%d\n", dp[n - 1]);
50
51 for (i = 0; i < n; ++i) {
52 contains[i].clear();
53 }
54 contains.clear();
55 dp.clear();
56 dict.clear();
57 }
58
59 return 0;
60 }
《Cracking the Coding Interview》——第17章:普通题——题目14,布布扣,bubuko.com
时间: 2024-08-03 08:20:37