tags: trie
You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.
Example 1: Input: s = "barfoothefoobarman", words = ["foo","bar"] Output: [0,9] Explanation: Substrings starting at index 0 and 9 are "barfoo" and "foobar" respectively. The output order does not matter, returning [9,0] is fine too. Example 2: Input: s = "wordgoodgoodgoodbestword", words = ["word","good","best","word"] Output: []
// 44ms, 84%
class Trie {
Trie *cc[26];
int pos;
public:
Trie() {
pos = -1;
fill_n(cc, 26, nullptr);
}
void insert(const string &s, int position) {
Trie* p = this;
for (auto &c : s) {
int idx = c - 'a';
if (!p->cc[idx]) {
p->cc[idx] = new Trie();
}
p = p->cc[idx];
}
p->pos = position;
}
int search(const string &s, int start, int sz) {
Trie* p = this;
for (int i = start; i < start + sz; i++) {
int idx = s[i] - 'a';
if (!p->cc[idx]) return -1;
p = p->cc[idx];
}
return p->pos;
}
};
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
vector<int> res;
if (words.empty() || s.empty())
return res;
unordered_map<string,int> mm;
for (auto &w : words) {
mm[w]++;
}
vector<int> flagTmpl(mm.size(), 0);
Trie trie;
int i = 0;
for (auto itt : mm) {
flagTmpl[i] = itt.second;
trie.insert(itt.first, i);
i++;
}
vector<int> dp(s.size(), -2);
const int WD_LEN = words[0].size();
for (int i = 0; i <= (int)s.size()-(int)(WD_LEN*words.size()); i++) {
if (dp[i] == -1) continue;
vector<int> flags(flagTmpl);
int flagCount = 0;
int start = i;
while (flagCount < words.size() && start <= s.size() - WD_LEN) {
int pos = dp[start];
if (pos == -1) break;
if (pos == -2) {
pos = trie.search(s, start, WD_LEN);
}
if (pos < 0) {
dp[start] = -1;
break;
}
if (flags[pos] < 1) {
break;
}
flags[pos]--;
dp[start] = pos;
start += WD_LEN;
++flagCount;
}
if (flagCount == words.size()) {
res.push_back(i);
}
}
return res;
}
};
// 232ms, 51%
class Solution {
public:
bool checkAtIndex(const string& s, int& from, const vector<string>& words, const vector<vector<int>>& wdMap) {
vector<int> usedFlag(words.size(), 0);
int usedCount = 0;
for (; from + words[0].size() <= s.size(); ) {
char c = s[from];
auto& wds = wdMap[c];
if (wds.empty()) return false;
bool foundone = false;
for (int idx : wds) {
if (usedFlag[idx] == 1) continue;
const string& s2 = words[idx];
if (strncmp(s.data() + from, s2.data(), s2.size()) != 0)
continue;
foundone = true;
from += s2.size();
usedFlag[idx] = 1;
usedCount += 1;
if (usedCount == words.size()) {
return true;
}
break;
}
if (!foundone) return false;
}
return false;
}
vector<int> findSubstring(string s, vector<string>& words) {
vector<int> ret;
if (s.empty() || words.empty() ||
(s.size() < words.size()*words[0].size())
) return ret;
int WD_SIZE = words[0].size();
vector<vector<int>> wdMap(256);
for (int i = 0; i < words.size(); i++) {
wdMap[words[i][0]].push_back(i);
}
for (int i = 0; i + WD_SIZE*words.size() <= s.size(); ) {
int from = i;
if (checkAtIndex(s, from, words, wdMap)) {
ret.push_back(i);
}
i++;
}
return ret;
}
};
int main()
{
//string s1(5000, 'a');
//vector<string> ar(5001, 'a');
//string s1 = "barfoofoobarthefoobarman";
//vector<string> ar = {"bar","foo","the"};
//string s1 = "";
//vector<string> ar = {};
string s1 = "aaaaaaaa";
vector<string> ar = {"aa","aa","aa"};
auto r = Solution().findSubstring(s1, ar);
for (auto a : r)
cout << a << " ,";
cout << endl;
}