POJ 2050 Searching the Web

题意简述:做一个极其简单的搜索系统,对以下四种输入进行分析与搜索:

    1. 只有一个单词:如 term, 只需找到含有这个单词的document,然后把这个document的含有这个单词term的那些行输出。

    2.term1 AND term2, 找到同时含有term1 和 term2 的document,然后把这个document的含有这个单词term1 或 term2 的那些行输出。

    3.term1 OR term2, 找到含有term1 或 term2 的document,然后把这个document的含有这个单词term1 或 term2 的那些行输出。

    4.NOT term,将不含有 term的document全部输出

思路简述:

    做一个set集合 Src, 记录了一个单词出现在哪些文件的哪些行;用一个map做映射,指出一个单词出现在哪些文件中,这样子,如果是两个单词,可以分别求出各个单词属于哪些文件,根据“AND”则进行set_intersection运算, 根据“OR”进行set_union运算。

/*
	Poj 2050
	Emerald
	10 May 2015
*/
#include <iostream>
#include <cstring>
#include <cstdio>
#include <cctype>
#include <sstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <algorithm>

using namespace std;

class Figure{ // meaning : a word can be found in the lineOrder‘th line of the docOrder‘th Doc
public:
	int docOrder, lineOrder;
	Figure() {}
	Figure( int d, int l ) {
		docOrder = d;
		lineOrder = l;
	}
};

bool operator < ( const Figure f1, const Figure f2 ) { // the comparisions used in set
	if( f1.docOrder!=f2.docOrder ) {
		return f1.docOrder < f2.docOrder;
	} else {
		return f1.lineOrder < f2.lineOrder;
	}
}

class Doc{ // meaning : the order of a Doc, and how many lines the Doc contains
public :
	int docOrder, lineLimit;
	Doc() {}
	Doc( int d, int l ) {
		this->docOrder = d;
		this->lineLimit = l;
	}
};

// set < Figure > src; // this defination defines an accurate instant
typedef set < Figure > Src; // a src contains the figures of a word
typedef set < int > docSrc;
map < string, docSrc > docMap; // referring to a word, wordLine get a line
map < string, Src > dict;
map < Figure, string > wordLine; // referring to a word, wordLine get a line
vector < Doc > docs;

const string DOC_END = "**********";
#define ALL(x) x.begin(),x.end()
#define INS(x) inserter(x,x.begin())

void Standard( string &line ); // make words tolower and other chars to whitespace
void WordRecord( string &line, int docOrder, int lineOrder ); // transfer words into src
void PrintSrc( Src &src ); // print as the problem commands
void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ); // know the docs and words, get target figures
void NotWord( string& word, Src &src ); // not the word w

int main() {
	int allDocs, queries;

	// input docs
	scanf( "%d", &allDocs );
	cin.get();
	for( int i=0; i<allDocs; i++ ) {
		string line;
		int lineCounter = 0;
		while( getline( cin, line ) && line != DOC_END ) { // read until end
			wordLine[ Figure( i, lineCounter ) ] = line;
			Standard( line );
			WordRecord( line, i, lineCounter ++ );
		}
		docs.push_back( Doc( i, lineCounter ) );
	}

	// input queries
	string command;
	scanf( "%d", &queries );
	cin.get();
	while( queries -- ) {
		getline( cin, command );
		Standard( command );
		Src src;
		if( command.find_last_of( ‘ ‘ ) == string::npos ) { // no whitespace
			Standard( command );
			src = dict[ command ];
		} else if( command.find_last_of( ‘ ‘ ) != command.find_first_of( ‘ ‘ ) ) { // if there‘re two different whitespaces
			stringstream ss( command );                                          // xxx AND/OR xxx
			string w1, w2, connected;
			ss >> w1 >> connected >> w2;
			docSrc dSrc1 = docMap[ w1 ];
			docSrc dSrc2 = docMap[ w2 ];

			docSrc dsrc;
			if( connected == "and" ) {
				set_intersection( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // intersection
			} else {
				set_union( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // union
			}

			WordsToFigure( src, dsrc, w1, w2 );
		} else { // only one whitespace -> Not xxx
			stringstream ss( command );
			string w1;
			ss >> w1 >> w1;
			NotWord( w1, src );
		}
		PrintSrc( src );
	}
	return 0;
}

void Standard( string &line ) {
	int length = line.length();
	for( int i=0; i<length; i++ ) {
		if( isalpha( line[i] ) ) {
			line[i] = tolower( line[i] ); // tolower, such as ‘A‘ to ‘a‘
		} else {
			line[i] = ‘ ‘; // if c isn‘t a alpha, c will be transferred to a whitespace
		}
	}
}

void WordRecord( string &line, int docOrder, int lineOrder ) {
	stringstream ss( line );
	string word;
	while( ss >> word ) {
		if( dict.count( word ) ) { // whether the word has been found in the total input
			if( !dict[word].count( Figure( docOrder, lineOrder ) ) ) { // whether the word has been found in this line
				dict[word].insert( Figure( docOrder, lineOrder ) ) ;
			}
		} else {
			Src src;
			src.insert( Figure( docOrder, lineOrder ) );
			dict[ word ] = src;
		}
		if( docMap.count( word ) ) { // whether the word has been found in this document
			docMap[word].insert( docOrder );
		} else {
			docSrc ds;
			docMap[word] = ds;
			docMap[word].insert( docOrder );
		}
	}
}

void PrintSrc( Src &src ) { // print the result
	if( src.size() == 0 ) {
		printf("Sorry, I found nothing.\n");
		printf( "==========\n" );
		return ;
	}
	Src :: iterator it = src.begin(), bef; // bef represents the former one
	printf( "%s\n", wordLine[ *it ].c_str() );
	bef = it++;
	while( it != src.end() ) {
		if( it->docOrder != bef->docOrder ) {
			printf( "----------\n" );
		}
		printf( "%s\n", wordLine[ *it ].c_str() );
		bef = it;
		it ++;
	}
	printf( "==========\n" );
}

void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ) {
	docSrc :: iterator it;
	for( it=dsrc.begin(); it != dsrc.end(); it ++ ) {
		Src :: iterator it2 ;
		for( it2 = dict[ w1 ].begin(); it2 !=dict[w1].end(); it2 ++ ) {
			if( *it == it2 -> docOrder ) {
				src.insert( *it2 ); // the w1 appears in this line of this document
			}
		}
		for( it2 = dict[ w2 ].begin(); it2 !=dict[w2].end(); it2 ++ ) {
			if( *it == it2 -> docOrder ) {
				src.insert( *it2 );	// the w1 appears in this line of this document
			}
		}
	}
}

void NotWord( string& word, Src &src ) { // not this word
	docSrc dsrc = docMap[ word ];
	vector< Doc > :: iterator it;
	for( it = docs.begin(); it != docs.end(); it ++ ) {
		if( !dsrc.count( it->docOrder ) ) {
			for( int i=0; i< it->lineLimit; i ++ ) {
				src.insert( Figure( it->docOrder, i ) );
			}
		}
	}
}
时间: 2024-10-11 20:23:37

POJ 2050 Searching the Web的相关文章

uva 1597 Searching the Web

The word "search engine" may not be strange to you. Generally speaking, a search engine searches the web pages available in the Internet, extracts and organizes the information and responds to users' queries with the most relevant pages. World f

Searching the Web UVA - 1597

链接:https://vjudge.net/problem/UVA-1597#author=0 这题写了我一个晚上,然后debug了一个早上.. 最主要就是AND那一部分,一开始用了一个很奇怪的方法实现,就是利用set递增的性质,设置一个cur变量保存现在遍历到的文章下标的最大值,然后检查s1和s2能否取到,cur每次取当前s1和s2的文章下标最大值.中间实现的时候也出了点bug,没有在遍历到末尾的时候跳出循环.然而这不是重点..重点在于cur不一定取到,也就是可以跳过cur取一个更大的值, 这

POJ题目分类推荐 (很好很有层次感)

著名题单,最初来源不详.直接来源:http://blog.csdn.net/a1dark/article/details/11714009 OJ上的一些水题(可用来练手和增加自信) (POJ 3299,POJ 2159,POJ 2739,POJ 1083,POJ 2262,POJ 1503,POJ 3006,POJ 2255,POJ 3094) 初期: 一.基本算法: 枚举. (POJ 1753,POJ 2965) 贪心(POJ 1328,POJ 2109,POJ 2586) 递归和分治法. 递

POJ 刷题指南

OJ上的一些水题(可用来练手和增加自信) (POJ 3299,POJ 2159,POJ 2739,POJ 1083,POJ 2262,POJ 1503,POJ 3006,POJ 2255,POJ 3094) 初期: 一.基本算法: 枚举. (POJ 1753,POJ 2965) 贪心(POJ 1328,POJ 2109,POJ 2586) 递归和分治法. 递推. 构造法.(POJ 3295) 模拟法.(POJ 1068,POJ 2632,POJ 1573,POJ 2993,POJ 2996) 二

pagerank算法

转自 http://blog.csdn.net/hguisu/article/details/7996185 1. PageRank算法概述 PageRank,即网页排名,又称网页级别.Google左侧排名或佩奇排名. 是Google创始人拉里·佩奇和谢尔盖·布林于1997年构建早期的搜索系统原型时提出的链接分析算法,自从Google在商业上获得空前的成功后,该算法也成为其他搜索引擎和学术界十分关注的计算模型.目前很多重要的链接分析算法都是在PageRank算法基础上衍生出来的.PageRank

How To Ask Questions The Smart Way

How To Ask Questions The Smart Way Eric Steven Raymond Thyrsus Enterprises <[email protected]> Rick Moen <[email protected]> Copyright ? 2001,2006,2014 Eric S. Raymond, Rick Moen Revision History Revision 3.10 21 May 2014 esr New section on St

Hybris Qualifications

Required Qualifications: -years' experience with enterprise software projects including Java development -Full Project Lifecycle experience, including requirements gathering, development, configuration and operation -Experienced in business process a

Building simple plug-ins system for ASP.NET Core(转)

Recently I built plug-ins support to my TemperatureStation IoT solution web site. The code for .NET Core is different from what we have seen on full .NET Framework (application domains etc) but there’s still nothing complex. This blog post describes

zz A list of open source C++ libraries

A list of open source C++ libraries < cpp‎ | links http://en.cppreference.com/w/cpp/links/libs The objective of this page is to build a comprehensive list of open source C++ libraries, so that when one needs an implementation of particular functional