-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
【Day 75 】2021-11-23 - 面试题 17.17 多次搜索 #94
Comments
思路可以用 Trie树来做。 具体步骤有参考suukii的仓库, 代码完全是自己写的~ 方法: Trie树把短字符串存到 Trie 中,保证 Trie 的高度尽量的小。
Trie 的操作:
代码实现语言: C++ class Solution {
private:
struct Node {
int idx;
vector<Node*> children;
Node() : idx(-1), children(26, nullptr) {}
};
struct Trie {
Node* root;
Trie() : root(new Node()) {}
void insert(string& word, int idx)
{
Node* p = root;
for (char& c : word) {
c -= 'a';
if (p->children[c] == nullptr)
{
p->children[c] = new Node();
}
p = p->children[c];
}
p->idx = idx;
}
};
public:
vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
unordered_map<string, vector<int>> cache;
const int n = big.length();
const int m = smalls.size();
vector<vector<int>> res(m);
Trie trie = Trie();
// 构造前缀树
for (int i = 0; i < m; i++)
{
trie.insert(smalls[i], i);
}
for (int i = 0; i < n; i++)
{
int j = i;
Node* node = trie.root;
while (j < n && node->children[big[j] - 'a'])
{
node = node->children[big[j] - 'a'];
if (node->idx != -1)
{
res[node->idx].push_back(i);
}
j++;
}
}
return res;
}
}; 复杂度分析
|
class Trie:
def __init__(self, words):
self.d = {}
for word in words:
t = self.d
for w in word:
if w not in t:
t[w] = {}
t = t[w]
t['end'] = word
def search(self, s):
t = self.d
res = []
for w in s:
if w not in t:
break
t = t[w]
if 'end' in t:
res.append(t['end'])
return res
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie(smalls)
hit = collections.defaultdict(list)
for i in range(len(big)):
matchs = trie.search(big[i:])
for word in matchs:
hit[word].append(i)
res = []
for word in smalls:
res.append(hit[word])
return res |
代码
C++ Code: struct triNode{
vector<triNode*> children;
bool end;
int idx;
triNode()
{
end = false;
idx = 0;
children.assign(26,NULL);
}
};
class Solution {
public:
vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
triNode* root = new triNode();
for(int i=0;i<smalls.size();i++)
{
triNode* temp = root;
for(int j=0;j<smalls[i].size();j++)
{
int idx = smalls[i][j] - 'a';
if(!temp->children[idx])
temp->children[idx] = new triNode();
temp = temp->children[idx];
}
temp->end = true;
temp->idx = i;
}
vector<vector<int>> res(smalls.size(),vector<int>(0,{}));
for(int i=0;i<big.size();i++)
{
triNode* temp = root;
int j = i;
while(temp&&j<big.size())
{
int idx = big[j] - 'a';
if(temp->children[idx])
{
temp = temp->children[idx];
if(temp->end)
{
res[temp->idx].push_back(i);
}
j++;
}
else
break;
}
}
return res;
}
};
|
class Trie:
def __init__(self, words):
self.d = {}
for word in words:
t = self.d
for w in word:
if w not in t:
t[w] = {}
t = t[w]
t['end'] = word
def search(self, s):
t = self.d
res = []
for w in s:
if w not in t:
break
t = t[w]
if 'end' in t:
res.append(t['end'])
return res
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie(smalls)
hit = collections.defaultdict(list)
for i in range(len(big)):
matchs = trie.search(big[i:])
for word in matchs:
hit[word].append(i)
res = []
for word in smalls:
res.append(hit[word])
return res |
思路把所有短字符 smalls 放入Trie
Java Codepublic class MultiSearch {
public int[][] multiSearch(String big, String[] smalls) {
Trie trie = new Trie();
for (String word : smalls) {
trie.insert(word);
}
Map<String, List<Integer>> indexMap = new HashMap<>();
for (int i = 0; i < big.length(); i++) {
String word = big.substring(i);
List<String> matches = trie.search(word);
for (String match : matches) {
indexMap.computeIfAbsent(match, x -> new ArrayList()).add(i);
}
}
int[][] res = new int[smalls.length][];
for (int i = 0; i < smalls.length; i++) {
List<Integer> indexList = indexMap.get(smalls[i]);
int[] indexArr = new int[indexList == null ? 0 : indexList.size()];
for (int j = 0; j < indexArr.length; j++) {
indexArr[j] = indexList.get(j);
}
res[i] = indexArr;
}
return res;
}
private static class Trie {
TrieNode root = new TrieNode();
public List<String> search(String str) {
List<String> result = new ArrayList<>();
TrieNode p = root;
for (char c : str.toCharArray()) {
if (p.children.get(c) == null) {
break;
}
p = p.children.get(c);
if (p.word != null) {
result.add(p.word);
}
}
return result;
}
public void insert(String str) {
TrieNode p = root;
for (char c : str.toCharArray()) {
if (p.children.get(c) == null) {
p.children.put(c, new TrieNode());
}
p = p.children.get(c);
}
p.word = str;
}
private static class TrieNode {
String word = null;
Map<Character, TrieNode> children = new HashMap<>();
}
}
} |
|
class Trie:
def __init__(self, words):
self.d = {}
for word in words:
t = self.d
for w in word:
if w not in t:
t[w] = {}
t = t[w]
t['end'] = word
def search(self, s):
t = self.d
res = []
for w in s:
if w not in t:
break
t = t[w]
if 'end' in t:
res.append(t['end'])
return res
class Solution:
def search(big, small):
trie = Trie(small)
check = {}
for i in range(len(big)):
match = trie.search(big[i:])
for word in match:
check[word].append(i)
res = []
for word in small:
res.append(check[word])
return res |
class Trie:
def __init__(self, words):
self.d = {}
for word in words:
t = self.d
for w in word:
if w not in t:
t[w] = {}
t = t[w]
t['end'] = word
def search(self, s):
t = self.d
res = []
for w in s:
if w not in t:
break
t = t[w]
if 'end' in t:
res.append(t['end'])
return res
class Solution:
def search(big, small):
trie = Trie(small)
check = {}
for i in range(len(big)):
match = trie.search(big[i:])
for word in match:
check[word].append(i)
res = []
for word in small:
res.append(check[word])
return res |
Rolling hash and Trie. class TrieNode:
def __init__(self):
self.children = defaultdict(TrieNode)
self.eow = False
self.word = None
class Trie:
def __init__(self):
self.root = TrieNode()
def insert(self, word: str) -> None:
node = self.root
for c in word:
node = node.children[c]
node.eow = True
node.word = word
def findWords(self, prefix):
ret = []
node = self.root
for c in prefix:
if c not in node.children:
break
node = node.children[c]
if node.eow:
ret.append(node.word)
return ret
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie()
smallidx = {}
for i, s in enumerate(smalls):
trie.insert(s)
smallidx[s] = i
ret = [[] for _ in range(len(smalls))]
for i in range(len(big)):
words = trie.findWords(big[i:])
for s in words:
if s not in smallidx: continue
ret[smallidx[s]].append(i)
return ret
def multiSearchRollingHash(self, big: str, smalls: List[str]) -> List[List[int]]:
if not big: return [[] for _ in range(len(smalls))]
def c2n(c):
return ord(c) - ord('a')
def get_hash(s, size):
base = 331
mod = 99999199999
hash = 0
hashset = defaultdict(list)
for i in range(size):
n = c2n(s[i])
hash = hash*base + n
hash = hash%mod
mag = pow(base, size-1, mod)
hashset[hash].append(0)
for i in range(size, len(s)):
left = i - size
hash = ((hash - mag*c2n(s[left]))*base + c2n(s[i]))%mod
hashset[hash].append(i-size+1)
return hashset
ls = set([len(s) for s in smalls])
hash_by_length = defaultdict(list)
for l in ls:
if l <= len(big):
hash_by_length[l]=get_hash(big, l)
ret = []
for s in smalls:
if len(s) == 0 or len(s) > len(big):
ret.append([])
continue
hash = list(get_hash(s, len(s)).keys())[0]
bighash = hash_by_length[len(s)]
locs = bighash[hash]
ret.append(locs)
return ret |
思路
关键点代码
C++ Code: struct triNode
{
vector<triNode*> child;
int count;
triNode()
{
child.assign(26, NULL);
count=-1;
}
};
class Solution {
public:
triNode* pRoot;
vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
pRoot = new triNode();
for(int i=0; i< smalls.size(); i++)
{
triNode* pCurrent = pRoot;
for(int j=0; j< smalls[i].size(); j++)
{
int index = smalls[i][j] - 'a';
if(pCurrent->child[index]==NULL)
{
pCurrent->child[index] = new triNode();
}
pCurrent = pCurrent->child[index];
}
pCurrent->count = i;
}
vector<vector<int>> ret(smalls.size());
for(int i=0; i< big.size(); i++)
{
dfs(big, i, ret, pRoot, smalls);
}
return ret;
}
void dfs(string& big, int start, vector<vector<int>>& ret, triNode* root, vector<string>& smalls)
{
if(start>=big.size())
return ;
int index = big[start]- 'a';
if(root->child[index])
{
if(root->child[index]->count>=0)
{
//cout<< "index:" << index << "start:" << start << "count" << root->child[index]->count << "\n";
ret[root->child[index]->count].push_back(start - smalls[root->child[index]->count].size()+1);
}
dfs(big, start+1, ret, root->child[index], smalls);
}
}
};
|
思路前缀树。 代码(C++)struct TrieNode{
int sid;
TrieNode *child[26];
TrieNode(){
sid=-1;
for(int i=0;i<26;++i) child[i]=NULL;
}
};
class Solution {
private:
TrieNode *root=new TrieNode();
public:
void insert(string word,int s){
int n=word.size();
TrieNode *cur=root;
for(int i=0;i<n;++i){
int cid=word.at(i)-'a';
if(cur->child[cid]==NULL) cur->child[cid]=new TrieNode();
cur=cur->child[cid];
}
cur->sid=s;
}
void search(string word,vector<vector<int>>& ans,int bid){
int n=word.size();
TrieNode *cur=root;
for(int i=0;i<n;++i){
int cid=word.at(i)-'a';
if(cur->sid!=-1) ans[cur->sid].push_back(bid);
if(cur->child[cid]==NULL) return ;
cur=cur->child[cid];
}
if(cur->sid!=-1) ans[cur->sid].push_back(bid);
}
vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
int n=smalls.size(),m=big.size();
vector<vector<int>> ans(n,vector<int>{});
for(int i=0;i<n;++i){
if(smalls[i].size()==0) continue;
insert(smalls[i],i);
}
for(int i=0;i<m;++i){
string word=big.substr(i,m-i);
search(word,ans,i);
}
return ans;
}
}; 复杂度分析
|
class Solution {
public static int[][] multiSearch(String big, String[] smalls) {
int[][]multiSearch = new int[smalls.length][];
for(int i = 0;i < smalls.length; i++){
String small = smalls[i];
if("".equals(small)){
multiSearch[0] = new int[0];
return multiSearch;
}
int g =0;
int[] nu = new int[10000];
for(int j =0;j <= big.length()-small.length(); j++){
String mid = big.substring(j,j+small.length());
if(mid.contains(small)){
int index = j;
//multiSearch[i][g] = index;
nu[g] = index;
g++;
}
}
multiSearch[i] = new int[g];
for (int k = 0; k <multiSearch[i].length ; k++) {
multiSearch[i][k] = nu[k];
}
}
return multiSearch;
}
} |
class Trie:
def __init__(self, words):
self.root = {}
for word in words:
current = self.root
for char in word:
if char not in current:
current[char] = {}
current = current[char]
# mark the end
current['end'] = word
def search(self, word):
current = self.root
res = []
for char in word:
if char not in current:
break
current = current[char]
if 'end' in current:
res.append(current['end'])
return res
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie(smalls)
matches = collections.defaultdict(list)
for i in range(len(big)):
current_matches = trie.search(big[i:])
for word in current_matches:
matches[word].append(i)
res = []
for word in smalls:
res.append(matches[word])
return res |
思路: type TrieNode struct{
children [26]*TrieNode
isEnd bool
id int
}
func (this *TrieNode) insert(word string, id int){
node := this
for _ , ch := range word{
ch -= 'a'
if node.children[ch] == nil{
node.children[ch] = &TrieNode{}
}
node = node.children[ch]
}
node.isEnd = true
node.id = id
}
func (this *TrieNode) search (word string) bool{
node := this
for _,ch := range word{
ch -= 'a'
if node.children[ch] == nil{
return false
}
node = node.children[ch]
}
return true
}
func multiSearch(big string, smalls []string) [][]int {
n := len(smalls)
trie := &TrieNode{}
out := make([][]int,n)
for i:=0;i<n;i++{
out[i] = make([]int,0)
}
for i,x := range smalls{
trie.insert(x,i)
}
for i:=0;i<len(big);i++{
node := trie
for j:=i;j<len(big);j++{
if node.children[big[j]-'a'] == nil{
break
}
node = node.children[big[j]-'a']
if node.isEnd{
out[node.id] = append(out[node.id],i)
}
}
}
return out
} 时间复杂度:O(N²) |
关键点代码
Java Code: class Solution {
public int[][] multiSearch(String big, String[] smalls) {
List<List<Integer>> list=new ArrayList<>();
for(int i=0;i<smalls.length;i++){
List<Integer> loc=new ArrayList<>();
//注意边界j>0&&j<=big.length()
for(int j=smalls[i].length();j>0&&j<=big.length();j++){if(smalls[i].equals(big.substring(j-smalls[i].length(),j))){loc.add(j-smalls[i].length());}}
list.add(new ArrayList<>(loc));
}
int[][] ans=new int[smalls.length][];
for(int k=0;k<smalls.length;k++){
ans[k]=new int[list.get(k).size()];
for(int p=0;p<list.get(k).size();p++){ans[k][p]=list.get(k).get(p);}
}
return ans;
}
} |
思路暴力解法 代码Java Code public int[][] multiSearch(String big, String[] smalls) {
int[][] res = new int[smalls.length][];
List<Integer> cur = new ArrayList<>();
for (int i = 0; i < smalls.length; i++) {
String small = smalls[i];
if (small.length() == 0) {
res[i] = new int[]{};
continue;
}
int startIdx = 0;
while (true) {
int idx = big.indexOf(small, startIdx);
if (idx == -1)
break;
cur.add(idx);
startIdx = idx + 1;
}
res[i] = new int[cur.size()];
for (int j = 0; j < res[i].length; j++)
res[i][j] = cur.get(j);
cur.clear();
}
return res;
} 复杂度
|
|
思路前缀树一开始只想到了把所有big字符串的后缀子串插入到前缀树里,但是感觉复杂度特别高,也想了把smalls生成前缀树但是没细想 前缀树的思路主要就是,
代码 def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = {}
ans = [[] for _ in range(len(smalls))]
for i, small in enumerate(smalls):
cur = trie
for ch in small:
if ch not in cur:
cur[ch] = {}
cur = cur[ch]
cur['*'] = i
for i, ch in enumerate(big):
cur = trie
index = i
while ch in cur:
cur = cur[ch]
if '*' in cur:
ans[cur['*']] += [index]
i += 1
if i >= len(big):
break
ch = big[i]
return ans 复杂度n是big字符串长度 m是smalls的个数,(考虑smalls里最长串长度为t)
|
思路过滤敏感词(短句):把短句数组放到前缀树里,长句从i=0开始遍历,在Trie里查找str.substring(i)。如果可以查到,那么说明短句出现在长句的i位置。 代码class Solution {
public int[][] multiSearch(String big, String[] smalls) {
Trie trie = new Trie();
for (String word : smalls) {
trie.insert(word);
}
HashMap<String, List<Integer>> map = new HashMap<>(); // { 短句 -> [在长句中出现的位置] }
for (int i=0; i<big.length(); i++) {
String subStr = big.substring(i);
List<String> res = trie.search(subStr);
for (String word : res) {
if (!map.containsKey(word)) {
map.put(word, new ArrayList<>());
}
map.get(word).add(i);
}
}
int[][] ans = new int[smalls.length][];
for (int i=0; i<smalls.length; i++) {
String word = smalls[i];
List<Integer> res = map.get(word);
if (res == null) {
ans[i] = new int[0];
} else {
ans[i] = new int[res.size()];
for (int j=0; j<res.size(); j++) {
ans[i][j] = res.get(j);
}
}
}
return ans;
}
class Trie {
Trie[] children;
String word;
Trie() {
children = new Trie[26];
word = null;
}
public void insert(String word) {
Trie node = this;
for (int i=0; i<word.length(); i++) {
int idx = word.charAt(i) - 'a';
if (node.children[idx] == null) {
node.children[idx] = new Trie();
}
node = node.children[idx];
}
node.word = word;
}
public List<String> search(String word) {
Trie node = this;
List<String> res = new ArrayList<>();
for (int i=0; i<word.length(); i++) {
int idx = word.charAt(i) - 'a';
if (node.children[idx] == null) {
break;
}
node = node.children[idx];
if (node.word != null) {
res.add(node.word);
}
}
return res;
}
}
} TC: O(len(big) * len(smalls[i])) |
思路
代码//trie
class Solution {
private Node root = new Node();
public int[][] multiSearch(String big, String[] smalls) {
int n = smalls.length;
// 初始化结果集
List<Integer>[] res = new List[n];
for(int i = 0 ; i < n ; i++)
res[i] = new ArrayList<>();
// 建树
for(int i = 0 ; i < smalls.length; i++)
insert(smalls[i], i);
for(int i = 0 ; i < big.length(); i++){
Node tmp = root;
for(int j = i ; j < big.length(); j++){
//不存在以该串为prefix的敏感词
if(tmp.children[big.charAt(j) - 'a'] == null)
break;
tmp = tmp.children[big.charAt(j) - 'a'];
if(tmp.isWord)
res[tmp.id].add(i);
}
}
// 返回二维数组
int[][] ret = new int[n][];
for(int i = 0 ; i < n ; i++){
ret[i] = new int[res[i].size()];
for(int j = 0 ; j < ret[i].length; j++)
ret[i][j] = res[i].get(j);
}
return ret;
}
private void insert(String word, int id){
Node tmp = root;
for(int i = 0; i < word.length(); i++){
if(tmp.children[word.charAt(i) - 'a'] == null)
tmp.children[word.charAt(i) - 'a'] = new Node();
tmp = tmp.children[word.charAt(i) - 'a'];
}
tmp.isWord = true;
tmp.id = id;
}
class Node {
Node[] children;
boolean isWord;
int id;
public Node() {
children = new Node[26];
isWord = false;
id = 0;
}
}
} 复杂度time: O(N*K),k=敏感词最长单词长度,N长句长度 |
|
代码块var multiSearch = function (big, smalls) {
let result = Array(smalls.length)
.fill(0)
.map(() => []); //预生成数组
let maxLength = Math.max(...smalls.map((small) => small.length)); //计算smalls中字符串的最大长度
let trie = new Trie(); //预建trie树
//将smalls 字符串插入树中
for (let i = 0; i < smalls.length; i++) {
trie.insert(smalls[i]);
}
for (let i = 0; i < big.length; i++) {
if (!trie.startsWith(big[i])) continue; //查找是否以 big[i] 开头的字符串,没有则跳过本次执行
let str = big.slice(i, i + maxLength);
let node = trie.root;
for (let j = 0; j < str.length; j++) {
let jchar = str[j];
if (!node.children[jchar]) {
break;
}
node = node.children[jchar];
if (node.isword) {
result[node.index].push(i);
}
}
}
return result;
}; 时间复杂度和空间复杂度
|
|
Idea:create a trie for smalls. Iterate through all substring of big. If the substring is found in the trie, save the ID of the trieNode. ID is the corresponding position of each small in the smalls array. Complexity:Time: O(N^2) N is the length of big class Solution {
TrieNode root = new TrieNode();
public int[][] multiSearch(String big, String[] smalls) {
int m=smalls.length;
for(int i=0;i<m;i++) {
insert(smalls[i],i);
}
//use an array of arraylist to store results
List<Integer>[] res = new List[m];
for(int i=0;i<m;i++) {
res[i] = new ArrayList<Integer>();
}
//iterate over all substring of big, if the substring is in the trie, save the index
int n = big.length();
for(int i=0;i<n;i++) {
TrieNode curr=root;
for(int j=i;j<n;j++) {
char c = big.charAt(j);
if(curr.children[c-'a']==null)
break;
curr=curr.children[c-'a'];
if(curr.isWord==true) {
res[curr.ID].add(i);
}
}
}
//convert res to 2d array
int[][] ret = new int[m][];
for(int i=0;i<m;i++) {
ret[i]=new int[res[i].size()];
for(int j=0;j<res[i].size();j++) {
ret[i][j]=res[i].get(j);
}
}
return ret;
}
//insert new word into trie
private void insert(String word, int index) {
TrieNode curr = root;
for(int i=0;i<word.length();i++) {
char c = word.charAt(i);
if(curr.children[c-'a']==null) {
curr.children[c-'a'] = new TrieNode();
}
curr=curr.children[c-'a'];
}
curr.ID=index;
curr.isWord=true;
}
private class TrieNode {
TrieNode[] children;
int ID; //store the index of smalls
boolean isWord;
public TrieNode() {
children = new TrieNode[26];
isWord = false;
ID=-1;
}
}
} |
Idea:hash class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
ans = []
seen = {}
for i in range(len(big)):
for j in range(i+1):
if big[j:i+1] in seen:
seen[big[j:i+1] ].append(j)
else:
seen[big[j:i+1]] = [j]
for i in smalls:
if i in seen:
ans.append(seen[i])
else:
ans.append([])
return ans Complexity:Time: O(N^2) |
class Solution {
Node root = new Node();
static class Node{
Node[] children;
boolean isEnd;
int id;
public Node(){
children = new Node[26];
}
}
public int[][] multiSearch(String big, String[] smalls) {
int[][] res = new int[smalls.length][];
for (int i = 0; i < smalls.length; i++) {
String small = smalls[i];
insert(small, i);
}
ArrayList<Integer>[] list = new ArrayList[res.length];
for (int i = 0; i < list.length; i++) {
list[i] = new ArrayList<>();
}
for (int i = 0; i < big.length(); i++) {
Node temp = root;
for (int j = i; j < big.length(); j++) {
int ch = big.charAt(j) - 'a';
if(temp.children[ch] == null) break;
temp = temp.children[ch];
if(temp.isEnd) {
list[temp.id].add(i);
}
}
}
for (int i = 0; i < res.length; i++) {
res[i] = list[i].stream().mapToInt(Integer::intValue).toArray();
}
return res;
}
public void insert(String small, int id) {
Node temp = root;
for (int i = 0; i < small.length(); i++) {
int ch = small.charAt(i) - 'a';
if(temp.children[ch] == null) {
temp.children[ch] = new Node();
}
temp = temp.children[ch];
}
temp.isEnd = true;
temp.id = id;
}
} |
public class Solution {
public int[][] multiSearch(String article, String[] sensitive) {
Map<String, List<Integer>> statistics = new LinkedHashMap<>();
Trie trie = new Trie();
for (int i = 0; i < sensitive.length; i++) {
trie.insert(sensitive[i]);
statistics.put(sensitive[i], new LinkedList<>());
}
int length = article.length();
int start = 0;
int end = 0;
while (start < length) {
//subString为[x,y)
String word = article.substring(start, end + 1);
if (!trie.searchWith(word)) {
end = ++start;
}
else {
if (statistics.containsKey(word))
statistics.get(word).add(start);
end++;
}
if (end > length - 1) {
end = ++start;
}
}
int[][] res = new int[sensitive.length][];
int rIndex = 0;
for (Map.Entry<String, List<Integer>> entry : statistics.entrySet()) {
int size = entry.getValue().size();
int[] tmp = new int[size];
for (int i = 0; i < size; i++) {
tmp[i] = entry.getValue().get(i);
}
res[rIndex++] = tmp;
}
return res;
}
class Trie {
TrieNode root;
Trie() {
root = new TrieNode();
}
public void insert(String word) {
TrieNode cur = root;
for (int i = 0; i < word.length(); i++) {
int cIndex = word.charAt(i) - 'a';
if (cur.children[cIndex] == null)
cur.children[cIndex] = new TrieNode();
cur = cur.children[cIndex];
}
cur.isEnd = true;
}
public boolean searchWith(String word) {
TrieNode cur = root;
for (int i = 0; i < word.length(); i++) {
int cIndex = word.charAt(i) - 'a';
if (cur.children[cIndex] == null)
return false;
cur = cur.children[cIndex];
}
return true;
}
}
class TrieNode {
boolean isEnd = false;
TrieNode[] children = new TrieNode[26];
}
} |
class Solution1 {
private:
TrieNode *root = new TrieNode();
void insert(string word, int s){
TrieNode *p = root;
for (int i = 0; i < word.size(); ++i){
int t = word[i] - 'a';
if (p->next[t] == nullptr) {
p->next[t] = new TrieNode();
}
p = p->next[t];
}
p->val = s;
}
void search(string word, vector<vector<int>>& res, int bid){
TrieNode *cur = root;
for (int i = 0; i < word.size(); ++i){
int t = word[i] - 'a';
if (cur->val != -1) {
res[cur->val].push_back(bid);
}
if (cur->next[t] == nullptr) {
return;
}
cur = cur->next[t];
}
if (cur->val != -1) {
res[cur->val].push_back(bid);
}
}
public:
vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
vector<vector<int>> res(smalls.size(), vector<int>{});
for (int i = 0; i < smalls.size(); ++i){
if (smalls[i].size() == 0) {
continue;
}
insert(smalls[i], i);
}
for (int i = 0; i < big.size(); ++i){
string word = big.substr(i, big.size() - i);
search(word, res, i);
}
return res;
}
}; |
// 利用js indexof暴力匹配 var multiSearch = function (big, smalls) {
var positions = [];
for (let i = 0; i < smalls.length; i++) {
positions[i] = [];
if (smalls[i] == "") continue;
var k = 0;
while (big.indexOf(smalls[i], k) >= 0) {
var pos = big.indexOf(smalls[i], k);
positions[i].push(pos);
if (k < big.length - 1)
k = pos + 1;
else
break;
}
}
return positions;
}; 时间复杂度:O(N*K) |
|
面试题 17.17. 多次搜索// 11-23 trie 模仿还得再研究研究
class Solution {
struct Trie {
Trie* next[26];
int idx = -1;
};
public:
vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
root = new Trie();
vector<vector<int>> res(smalls.size(), vector<int> {});
for (int i = 0; i < smalls.size(); ++i) {
insert(smalls[i], i);
}
search(big, res);
return res;
}
private:
Trie* root;
void insert(string s, int idx) {
Trie* p = root;
for (auto &a : s) {
int i = a - 'a';
if (p->next[i] == NULL) {
p->next[i] = new Trie();
}
p = p->next[i];
}
p->idx = idx;
}
void search(string s, vector<vector<int>>& res) {
for (int i = 0; i < s.size(); ++i) {
Trie* p = root;
int j = i;
while (j < s.size()) {
int k = s[j] - 'a';
if (p->next[k] == NULL) {
break;
}
p = p->next[k];
++j;
if (p->idx != -1) {
res[p->idx].push_back(i);
}
}
}
}
}; |
Codes var multiSearch = function(big, smalls) {
let result=[]
function match(big,item){
let res=[];
if(item==''||item.length>big.length){
return res
}
let index=big.indexOf(item)
while(index>-1){
res.push(index)
let ary=big.split('')
ary.splice(index,1,'#')
big=ary.join('')
index=big.indexOf(item)
}
return res
}
for(let i of smalls){
result.push(match(big,i))
}
return result
}; |
class Solution {
public int[][] multiSearch(String big, String[] smalls) {
List<List<Integer>> list=new ArrayList<>();
for(int i=0;i<smalls.length;i++){
List<Integer> loc=new ArrayList<>();
for(int j=smalls[i].length();j>0&&j<=big.length();j++){if(smalls[i].equals(big.substring(j-smalls[i].length(),j))){loc.add(j-smalls[i].length());}}
list.add(new ArrayList<>(loc));
}
int[][] ans=new int[smalls.length][];
for(int k=0;k<smalls.length;k++){
ans[k]=new int[list.get(k).size()];
for(int p=0;p<list.get(k).size();p++){ans[k][p]=list.get(k).get(p);}
}
return ans;
}
} |
func multiSearch(big string, smalls []string) [][]int {
sa := suffixarray.New([]byte(big))
pos := make([][]int, 0, len(smalls))
for _, s := range smalls {
// 从后往前找s
ps := sa.Lookup([]byte(s), -1)
// 从小到大排序
sort.Ints(ps)
pos = append(pos, ps)
}
return pos
} |
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = {}
ans = [[] for _ in range(len(smalls))]
for i, small in enumerate(smalls):
cur = trie
for ch in small:
if ch not in cur:
cur[ch] = {}
cur = cur[ch]
cur['*'] = i
for i, ch in enumerate(big):
cur = trie
index = i
while ch in cur:
cur = cur[ch]
if '*' in cur:
ans[cur['*']] += [index]
i += 1
if i >= len(big):
break
ch = big[i]
return ans |
class Solution {
} |
思路:前缀树
时间复杂度:O(N*K) ,K 是敏感词中最长单词长度,N 是长句的长度。 |
解题思路前缀树。参考官方题解。将所有在smalls中的word插入到前缀树中,然后我们每次改变起始字母搜索big中的子字符串,判断是否在前缀树中,如果存在,我们就放在结果数组中。 代码class Solution {
class TrieNode {
TrieNode[] children;
boolean isWord;
int id;
public TrieNode() {
children = new TrieNode[26];
isWord = false;
id = 0;
}
}
private TrieNode root = new TrieNode();
public int[][] multiSearch(String big, String[] smalls) {
int n = smalls.length;
// initialize res array
List<Integer>[] res = new List[n];
for (int i = 0; i < n; i++) {
res[i] = new ArrayList<>();
}
// build a trie, insert each word in smalls and use the index in smalls as id
for (int i = 0; i < n; i++) {
insert(smalls[i], i);
}
// search in big, change the starting letter each time and search whether substring exists in smalls
for (int i = 0; i < big.length(); i++) {
TrieNode node = root;
for (int j = i; j < big.length(); j++) {
char letter = big.charAt(j);
if (node.children[letter - 'a'] == null) {
break;
}
node = node.children[letter - 'a'];
if (node.isWord) {
res[node.id].add(i);
}
}
}
// convert to 2D array for output
int[][] ret = new int[n][];
for (int i = 0; i < n; i++) {
ret[i] = new int[res[i].size()];
for (int j = 0; j < ret[i].length; j++) {
ret[i][j] = res[i].get(j);
}
}
return ret;
}
private void insert(String word, int id) {
TrieNode node = root;
for (int i = 0; i < word.length(); i++) {
char letter = word.charAt(i);
if (node.children[letter - 'a'] == null) {
node.children[letter - 'a'] = new TrieNode();
}
node = node.children[letter - 'a'];
}
node.isWord = true;
node.id = id;
}
} 复杂度分析
|
class Trie:
def __init__(self, words):
self.d = {}
for i in range(len(words)):
tree = self.d
for char in words[i]:
if char not in tree:
tree[char] = {}
tree = tree[char]
tree['end'] = i
def search(self, s):
tree = self.d
res = []
for char in s:
if char not in tree:
return res
tree = tree[char]
if 'end' in tree:
res.append(tree['end'])
return res
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie(smalls)
res = [[] for _ in range(len(smalls))]
for i in range(len(big)):
tmp = trie.search(big[i:])
for idx in tmp:
res[idx].append(i)
return res |
class Solution { public:
private:
}; |
思路参考官网: 代码``
复杂度分析时间复杂度:O(n∗m),n=len(big),m=len(smalls) 空间复杂度:O(m∗k),k=max(len(smalls[i])),即k是smalls中最长字符串的长度 |
class Trie:
def __init__(self, words):
self.d = {}
for word in words:
t = self.d
for w in word:
if w not in t:
t[w] = {}
t = t[w]
t['end'] = word
def search(self, s):
t = self.d
res = []
for w in s:
if w not in t:
break
t = t[w]
if 'end' in t:
res.append(t['end'])
return res
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie(smalls)
hit = collections.defaultdict(list)
for i in range(len(big)):
matchs = trie.search(big[i:])
for word in matchs:
hit[word].append(i)
res = []
for word in smalls:
res.append(hit[word])
return res |
多次搜索思路字典树(评论区) 代码class Trie:
def __init__(self, words):
self.d = {}
for i in range(len(words)):
tree = self.d
for char in words[i]:
if char not in tree:
tree[char] = {}
tree = tree[char]
tree['end'] = i
def search(self, s):
tree = self.d
res = []
for char in s:
if char not in tree:
return res
tree = tree[char]
if 'end' in tree:
res.append(tree['end'])
return res
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie(smalls)
res = [[] for _ in range(len(smalls))]
for i in range(len(big)):
tmp = trie.search(big[i:])
for idx in tmp:
res[idx].append(i)
return res |
思路
把敏感词 smalls 的数量记为 t,把敏感词里最长的字符串长度记为 k,把长句子 big 的长度记为 b。 具体步骤: 1)把这堆敏感词建成一颗 Trie 树,时间复杂度是 O(tk)。 2)遍历长句子的每一个字母,检查“以该字母作为起点”的话,是否可以在 trie 中找到结果。时间复杂度是 O(bk) 综上,总的时间复杂度是 O(tk + bk)。在这种题目场景下这种 trie 的思路应该就是时间复杂度最好的答案了。 class Solution {
private:
struct node
{
int idx;
vector<node*> children;
node() : idx(-1), children(26, nullptr){}
};
struct Trie
{
node* root;
Trie():root(new node()){}
void insert(string& word, int idx)
{
node* p = root;
for(auto& w:word)
{
w-='a';
if(p->children[w]==nullptr)
{
p->children[w] = new node();
}
p = p->children[w];
}
p->idx = idx;
}
};
public:
vector<vector<int>> multiSearch(string big, vector<string>& smalls)
{
int big_N = big.length();
int small_N = smalls.size();
vector<vector<int>> res(small_N);
Trie trie = Trie();
for(int i=0; i<small_N; i++)
{
trie.insert(smalls[i], i);
}
for(int i=0; i<big_N; i++)
{
int j = i;
node* p = trie.root;
while(j<big_N && p->children[big[j]-'a'])
{
p = p ->children[big[j]-'a'];
if(p->idx != -1)
res[p->idx].push_back(i);
j++;
}
}
return res;
}
}; Complexity:Time:O(n^2) |
思路前缀树 代码class Trie:
def __init__(self, words):
self.d = {}
for word in words:
t = self.d
for w in word:
if w not in t:
t[w] = {}
t = t[w]
t['end'] = word
def search(self, s):
t = self.d
res = []
for w in s:
if w not in t:
break
t = t[w]
if 'end' in t:
res.append(t['end'])
return res
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie(smalls)
hit = collections.defaultdict(list)
for i in range(len(big)):
matchs = trie.search(big[i:])
for word in matchs:
hit[word].append(i)
res = []
for word in smalls:
res.append(hit[word])
return res 复杂度
|
|
class Solution {
} |
class Solution {
} |
//以smalls数组构建字典树,每个长串的后缀字符串去查找结果 class Solution { |
思路根据敏感词数组构建一颗trie树,然后遍历big的所有子串,如果子串是敏感词就记录下开始位置,最后返回结果 class Solution {
Node root = new Node();
public int[][] multiSearch(String big, String[] smalls) {
int[][] ans = new int[smalls.length][];
Map<Integer,List<Integer>> ansMap = new HashMap<>();
for(int idx = 0;idx<smalls.length;idx++){
insert(smalls[idx],idx);
}
for(int i=0;i<big.length();i++){
Node cur = root;
for(int j=i;j<big.length();j++){
int jIdx = big.charAt(j) - 'a';
if(cur.children[jIdx] == null)break;
cur = cur.children[jIdx];
if(cur.isWord){
ansMap.putIfAbsent(cur.id, new ArrayList<>());
ansMap.get(cur.id).add(i);
}
}
}
for(int idx = 0;idx<smalls.length;idx++){
List<Integer> list = ansMap.getOrDefault(idx,new ArrayList<>());
int[] arr = new int[list.size()];
for(int k=0;k<arr.length;k++){
arr[k] = list.get(k);
}
ans[idx] = arr;
}
return ans;
}
void insert(String word,int id){
Node cur = root;
for(int i=0;i<word.length();i++){
int idx = word.charAt(i) - 'a';
if(cur.children[idx] == null){
cur.children[idx] = new Node();
}
cur = cur.children[idx];
}
cur.isWord = true;
cur.id = id;
}
private static class Node{
int id;
boolean isWord = false;
Node[] children = new Node[26];
}
} big长度为n, smalls最长字符串为k
|
【Day 75】面试题 17.17 多次搜索代码class Solution {
private:
struct node
{
int idx;
vector<node*> children;
node() : idx(-1), children(26, nullptr){}
};
struct Trie
{
node* root;
Trie():root(new node()){}
void insert(string& word, int idx)
{
node* p = root;
for(auto& w:word)
{
w-='a';
if(p->children[w]==nullptr)
{
p->children[w] = new node();
}
p = p->children[w];
}
p->idx = idx;
}
};
public:
vector<vector<int>> multiSearch(string big, vector<string>& smalls)
{
int big_N = big.length();
int small_N = smalls.size();
vector<vector<int>> res(small_N);
Trie trie = Trie();
for(int i=0; i<small_N; i++)
{
trie.insert(smalls[i], i);
}
for(int i=0; i<big_N; i++)
{
int j = i;
node* p = trie.root;
while(j<big_N && p->children[big[j]-'a'])
{
p = p ->children[big[j]-'a'];
if(p->idx != -1)
res[p->idx].push_back(i);
j++;
}
}
return res;
}
}; 复杂度时间复杂度:O(N^2) 空间复杂度:O(N^2) |
思路对smalls 构造trie树,对big的每个子串查询 代码class TrieNode:
def __init__(self):
self.children = {}
self.isEnd = False
class Trie:
def __init__(self):
self.root = TrieNode()
def insert(self,word:str):
node = self.root
for ch in word:
if ch not in node.children:
node.children[ch] = TrieNode()
node = node.children[ch]
node.isEnd = True
def search(self,word:str):
node = self.root
for ch in word:
if ch not in node.children:
return False
node = node.children[ch]
return node.isEnd
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie()
res = {}
for word in smalls:
trie.insert(word)
res[word] = []
for i in range(len(big)):
for j in range(i,len(big)):
if trie.search(big[i:j+1]):
res[big[i:j+1]].append(i)
return list(res.values()) 复杂度
|
class Solution {
public int[][] multiSearch(String big, String[] smalls) {
List<List<Integer>> list=new ArrayList<>();
for(int i=0;i<smalls.length;i++){
List<Integer> loc=new ArrayList<>();
//注意边界j>0&&j<=big.length()
for(int j=smalls[i].length();j>0&&j<=big.length();j++){if(smalls[i].equals(big.substring(j-smalls[i].length(),j))){loc.add(j-smalls[i].length());}}
list.add(new ArrayList<>(loc));
}
int[][] ans=new int[smalls.length][];
for(int k=0;k<smalls.length;k++){
ans[k]=new int[list.get(k).size()];
for(int p=0;p<list.get(k).size();p++){ans[k][p]=list.get(k).get(p);}
}
return ans;
}
} |
Note
Solutionclass TrieNode:
def __init__(self):
self.id = 0 # idx in smalls
self.children = {}
self.is_word = False
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
root = TrieNode()
# Build trie with smalls
for i in range(len(smalls)):
node = root
for c in smalls[i]:
if c not in node.children:
node.children[c] = TrieNode()
node = node.children[c]
node.is_word = True
node.id = i
# start search with big
res = [[] for _ in range(len(smalls))]
for i in range(len(big)):
node = root
for j in range(i, len(big)):
if big[j] not in node.children:
break
node = node.children[big[j]]
if node.is_word:
res[node.id].append(i)
return res Time complexity: O((N+L) * K) where N=len(smalls), L=len(big), K=avg small word length Space complexity: O(NK) Trie tree's worst case space |
Title:面试题 17.17. 多次搜索Question Reference LeetCodeSolutionBuild a trie using small, the trie will also store the index of the string in small. Codeclass Solution {
public:
struct trie {
int index;
vector<trie*> children;
trie() {
index = -1;
children = vector<trie*>(26, NULL);
}
};
vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
trie* tree = new trie();
// Build trie
for(int i = 0; i < smalls.size(); i++) {
trie* node = tree;
for (char ch : smalls[i]) {
if (node->children[ch - 'a'] == NULL)
node->children[ch - 'a'] = new trie();
node = node->children[ch - 'a'];
}
node->index = i;
}
vector<vector<int>> ans = vector<vector<int>>(smalls.size(), vector<int>());
// Search
for (int i = 0; i < big.length(); i++) {
trie* node = tree;
for (int j = i; j < big.length(); j++) {
if (node->children[big[j] - 'a'] == NULL)
break;
node = node->children[big[j] - 'a'];
if (node->index != -1)
ans[node->index].push_back(i);
}
}
return ans;
}
};
ComplexityTime Complexity and ExplanationO(n*m): n is the length of big, since for each letter, it is potentially to compare with the whole trie tree. Space Complexity and ExplanationO(m): there will only be 26 letters, m is the max length of the string in small |
Problemhttps://leetcode-cn.com/problems/multi-search-lcci/ Notes
Solutionclass Solution {
public int[][] multiSearch(String big, String[] smalls) {
// create a trie
TrieNode root = new TrieNode();
// insert all the words in the smalls array to the trie
for(int i = 0; i < smalls.length; i++){
String word = smalls[i];
TrieNode node = root;
for(char c: word.toCharArray()){
if(node.children[c - 'a'] == null){
node.children[c - 'a'] = new TrieNode();
}
node = node.children[c - 'a'];
}
node.isWord = true;
node.index = i;
}
// search each substring of the big in the trie
List<Integer>[] search = new List[smalls.length];
for(int i = 0; i < big.length(); i++){
String key = big.substring(i);
TrieNode node = root;
for(char c: key.toCharArray()){
if(node.children[c - 'a'] == null){
break;
}
node = node.children[c - 'a'];
if(node.isWord){
if(search[node.index] == null){
search[node.index] = new ArrayList<>();
}
search[node.index].add(i);
}
}
}
// convert the search result to final result
int[][] res = new int[search.length][];
for(int i = 0; i < search.length; i++){
if(search[i] == null){
res[i] = new int[0];
}else{
res[i] = new int[search[i].size()];
for(int j = 0; j < search[i].size(); j++){
res[i][j] = search[i].get(j);
}
}
}
return res;
}
}
class TrieNode{
int index;
boolean isWord;
TrieNode[] children;
public TrieNode(){
this.index = 0;
this.isWord = false;
this.children = new TrieNode[26];
}
} Complexity
|
Trie 树这道题我们可以把small 放进trie里,每个词最后做个标记。然后遍历big,big需要遍历每个可能的substring (big[0:], big[1:], big[2:] ...),如果发现当前substring 是个词,那么加到ans里,最后处理ans即可。 class Trie:
def __init__(self):
self.root = {}
class Solution:
def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
trie = Trie()
for word in smalls:
cur = trie.root
for char in word:
if char in cur:
cur = cur[char]
else:
cur[char] = {}
cur = cur[char]
cur["*"] = word
ans = collections.defaultdict(list)
for i in range(len(big)):
cur = trie.root
for innerChar in big[i:]:
if innerChar in cur:
cur = cur[innerChar]
if "*" in cur:
ans[cur["*"]].append(i)
else:
break
ret = []
for i in smalls:
ret.append(ans[i])
return ret 时间: |
面试题 17.17 多次搜索
入选理由
暂无
题目地址
https://leetcode-cn.com/problems/multi-search-lcci
前置知识
题目描述
The text was updated successfully, but these errors were encountered: