Constraints
- tokens will contain between 0 and 50 elements
inclusive
- Each element of tokens will have length between 1 and 50
inclusive
- Each element of tokens will only consist of letters
(A-Z,a-z)
- input will have length between 0 and 50 inclusive
- input
will only consist of letters (A-Z,a-z)
Examples
0)
{"ab","aba","A"}
"ababbbaAab"
Returns: { "aba",
"A",
"ab" }
Same as above
1)
{"a","a","aa","aaa","aaaa","aaaaa","aa"}
"aaaaaaaaaaaaaaaaaaaaaaaaa"
Returns:
{ "aaaaa",
"aaaaa",
"aaaaa",
"aaaaa",
"aaaaa" }
Make sure
to use the longest valid starting token
2)
{"wow","wo","w"}
"awofwwofowwowowowwwooo"
Returns: { "wo",
"w",
"wo",
"w",
"wow",
"wow",
"w",
"wo" }
3)
{"int","double","long","char","boolean","byte","float"}
"intlongdoublecharintintboolean"
Returns:
{ "int",
"long",
"double",
"char",
"int",
"int",
"boolean" }
4)
{}
"Great"
Returns: { }
No valid tokens, so nothing is
CONSUMED
5)
{"AbCd","dEfG","GhIj"}
"abCdEfGhIjAbCdEfGhIj"
Returns: {
"dEfG",
"AbCd",
"GhIj" }
Remember CASE-SENSITIVITY
This problem statement is the exclusive and proprietary property of
TopCoder, Inc.
Any unauthorized use or reproduction of this information
without the prior written consent of TopCoder, Inc. is strictly prohibited.
(c)2003, TopCoder, Inc. All rights reserved.
*/
/*
My Policy:
1 对token进行排序,以长度作为依据,排成降序
2 对输入数据 循环查找tokens,
从长到短;如果找到就进行输出;
3 跳过找到的串,对后续的数据在应用相同的原则,继续进行处理;
Class: Lexer
Method: tokenize
Parameters: vector <string>,
string
Returns: vector <string>
Method signature: vector
<string> tokenize(vector <string> tokens, string input)
(be sure
your method is public)
*/
#pragma warning ( disable: 4786)
#include <string>
#include
<vector>
#include <algorithm>
#include <iostream>
using namespace std;
class Lexer {
public:
static compare(const string &a, const
string &b) {
return a.length() > b.length();
}
vector
<string> tokenize(vector <string> tokens, string input)
{
vector<string> result;
if ( !tokens.size() )
{
std::cerr << "No valid tokens, so nothing is CONSUMED" <<
std::endl;
return result;
}
if ( input.length() == 0 )
{
std::cerr << "Input data is emtpy, Needn't process" <<
std::endl;
return result;
}
// 需要排序, 并且需要按照字符串的长度以减序进行排序,
// 这样下面就不需要进行特别的处理;
// 否这,
在出现长短两个串在同一个起始位置都匹配的时候,需要进行优先判断,
// 以串长的为优先
sort(tokens.begin(),tokens.end(),Lexer::compare);
const char
*p = input.c_str();
size_t pos=0;
size_t curpos=0;
size_t minpos=0;
int minidx=0;
int len=input.length();
const char *pp=p;
while(*p) {
minpos=len;
for(int i=0; i<tokens.size(); ++i
) {
curpos=input.find(tokens[i],pos);
if ( curpos != string::npos
) {
if ( curpos < minpos ) {
minpos =
curpos;
minidx = i;
}
/*
//
如果前面不对tokens按照长度进行排序的话, 需要下面的处理
else if ( curpos == minpos ) {
if ( tokens[i].length() > tokens[minidx].length() ) minidx =
i;
}
*/
}
}
if ( minpos == len )
break;
p =pp + minpos + tokens[minidx].length();
pos =minpos +
tokens[minidx].length();
result.push_back(tokens[minidx]);
}
return
result;
}
};
int main(int argc, char* argv[])
{
Lexer
lex;
vector<string> result;
// Sample 0)
vector<string>
tokens;
char *tk0[] = {"ab","aba","A"};
string
str0="ababbbaAabC";
copy(tk0+0,tk0+3,std::back_inserter(tokens) );
std::cout << "input string\n";
std::cout << str0
<< std::endl;
std::cout << "tokens:
\n";
copy(tokens.begin(),tokens.end(), ostream_iterator<string>(cout,
" "));
std::cout << std::endl;
result = lex.tokenize(tokens,str0);
copy(result.begin(), result.end() ,ostream_iterator<string>(cout,
"\n"));
tokens.clear();
//-------------------------------------------------------------------------------//
// Sample 1)
char *tk1[]={"a","a","aa","aaa","aaaa","aaaaa","aa"};
string
str1="aaaaaaaaaaaaaaaaaaaaaaaaa";
copy(tk1,tk1+7,std::back_inserter(tokens) );
std::cout << "input
string\n";
std::cout << str1 << std::endl;
std::cout
<< "tokens: \n";
copy(tokens.begin(),tokens.end(),
ostream_iterator<string>(cout, " "));
std::cout <<
std::endl;
result = lex.tokenize(tokens,str1);
copy(result.begin(), result.end() ,ostream_iterator<string>(cout,
"\n"));
tokens.clear();
//-------------------------------------------------------------------------------//
// Sample 2)
char *tk2[]={"wow","wo","w"};
string
str2="awofwwofowwowowowwwooo";
copy(tk2,tk2+3,std::back_inserter(tokens) );
std::cout <<
"input string\n";
std::cout << str2 << std::endl;
std::cout
<< "tokens: \n";
copy(tokens.begin(),tokens.end(),
ostream_iterator<string>(cout, " "));
std::cout <<
std::endl;
result = lex.tokenize(tokens,str2);
copy(result.begin(), result.end() ,ostream_iterator<string>(cout,
"\n"));
tokens.clear();
//-------------------------------------------------------------------------------//
//
Sample 3)
char
*tk3[]={"int","double","long","char","boolean","byte","float"};
string
str3="intlongdoublecharintintboolean";
copy(tk3,tk3+7,std::back_inserter(tokens) );
std::cout <<
"input string\n";
std::cout << str3 << std::endl;
std::cout
<< "tokens: \n";
copy(tokens.begin(),tokens.end(),
ostream_iterator<string>(cout, " "));
std::cout <<
std::endl;
result = lex.tokenize(tokens,str3);
copy(result.begin(), result.end() ,ostream_iterator<string>(cout,
"\n"));
tokens.clear();
//-------------------------------------------------------------------------------//
//
Sample 4)
// char *tk4[]={};
string str4="Great";
// copy(tk4,tk4+0,std::back_inserter(tokens) );
std::cout <<
"input string\n";
std::cout << str4 << std::endl;
std::cout
<< "tokens: \n";
copy(tokens.begin(),tokens.end(),
ostream_iterator<string>(cout, " "));
std::cout <<
std::endl;
result = lex.tokenize(tokens,str4);
copy(result.begin(), result.end() ,ostream_iterator<string>(cout,
"\n"));
tokens.clear();
//-------------------------------------------------------------------------------//
//
Sample 5
char *tk5[]={"AbCd","dEfG","GhIj"};
string
str5="abCdEfGhIjAbCdEfGhIj";
copy(tk5,tk5+3,std::back_inserter(tokens) );
std::cout <<
"input string\n";
std::cout << str5 << std::endl;
std::cout
<< "tokens: \n";
copy(tokens.begin(),tokens.end(),
ostream_iterator<string>(cout, " "));
std::cout <<
std::endl;
result = lex.tokenize(tokens,str5);
copy(result.begin(), result.end() ,ostream_iterator<string>(cout,
"\n"));
tokens.clear();
//-------------------------------------------------------------------------------//
return
0;
}