How do I speed up recursive search function?
我写的搜索功能的速度有问题。功能步骤如下:
功能目标是能够跟踪表之间的链接,其中链接是直接的或具有多个分离度。递归级别是一个固定的整数值。
我的问题是,任何时候我试图运行这个功能两个层次的搜索深度(不敢在这个阶段更深入),工作耗尽了记忆,或者我失去了耐心。我等了17分钟,才有一次工作没了记忆。
每个表的平均列数为28,标准差为34。
下面是一个图表,显示了可以在表之间建立的各种链接的示例:
。
这是我的代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | private void FindLinkingTables(List<TableColumns> sourceList, TableSearchNode parentNode, string targetTable, int maxSearchDepth) { if (parentNode.Level < maxSearchDepth) { IEnumerable<string> tableColumns = sourceList.Where(x => x.Table.Equals(parentNode.Table)).Select(x => x.Column); foreach (string sourceColumn in tableColumns) { string shortName = sourceColumn.Substring(1); IEnumerable<TableSearchNode> tables = sourceList.Where( x => x.Column.Substring(1).Equals(shortName) && !x.Table.Equals(parentNode.Table) && !parentNode.Ancenstory.Contains(x.Table)).Select( x => new TableSearchNode { Table = x.Table, Column = x.Column, Level = parentNode.Level + 1 }); foreach (TableSearchNode table in tables) { parentNode.AddChildNode(sourceColumn, table); if (!table.Table.Equals(targetTable)) { FindLinkingTables(sourceList, table, targetTable, maxSearchDepth); } else { table.NotifySeachResult(true); } } } } } |
编辑:分离表searchnode逻辑并添加完整性的属性和方法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | //TableSearchNode public Dictionary<string, List<TableSearchNode>> Children { get; private set; } //TableSearchNode public List<string> Ancenstory { get { Stack<string> ancestory = new Stack<string>(); TableSearchNode ancestor = ParentNode; while (ancestor != null) { ancestory.Push(ancestor.tbl); ancestor = ancestor.ParentNode; } return ancestory.ToList(); } } //TableSearchNode public void AddChildNode(string referenceColumn, TableSearchNode childNode) { childNode.ParentNode = this; List<TableSearchNode> relatedTables = null; Children.TryGetValue(referenceColumn, out relatedTables); if (relatedTables == null) { relatedTables = new List<TableSearchNode>(); Children.Add(referenceColumn, relatedTables); } relatedTables.Add(childNode); } |
号
提前感谢您的帮助!
你真的浪费了很多记忆。立即想到的是:
首先,用
1 2 | ILookup<string, TableColumns> sourceLookup = sourceList.ToLookup(s => s.Table); FindLinkingTables(sourceLookup, parentNode, targetTable, maxSearchDepth); |
如果不需要,不要打电话给
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | private void FindLinkingTables(ILookup<string, TableColumns> sourceLookup, TableSearchNode parentNode, string targetTable, int maxSearchDepth) { if (parentNode.Level < maxSearchDepth) { var tableColumns = sourceLookup[parentNode.Table].Select(x => x.Column); foreach (string sourceColumn in tableColumns) { string shortName = sourceColumn.Substring(1); var tables = sourceLookup .Where( group => !group.Key.Equals(parentNode.Table) && !parentNode.Ancenstory.Contains(group.Key)) .SelectMany(group => group) .Where(tableColumn => tableColumn.Column.Substring(1).Equals(shortName)) .Select( x => new TableSearchNode { Table = x.Table, Column = x.Column, Level = parentNode.Level + 1 }); foreach (TableSearchNode table in tables) { parentNode.AddChildNode(sourceColumn, table); if (!table.Table.Equals(targetTable)) { FindLinkingTables(sourceLookup, table, targetTable, maxSearchDepth); } else { table.NotifySeachResult(true); } } } } } |
号
[编辑]
此外,为了加速剩余的复杂LINQ查询,您还可以准备另一个
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | ILookup<string, TableColumns> sourceColumnLookup = sourceLlist .ToLookup(t => t.Column.Substring(1)); //... private void FindLinkingTables( ILookup<string, TableColumns> sourceLookup, ILookup<string, TableColumns> sourceColumnLookup, TableSearchNode parentNode, string targetTable, int maxSearchDepth) { if (parentNode.Level >= maxSearchDepth) return; var tableColumns = sourceLookup[parentNode.Table].Select(x => x.Column); foreach (string sourceColumn in tableColumns) { string shortName = sourceColumn.Substring(1); var tables = sourceColumnLookup[shortName] .Where(tableColumn => !tableColumn.Table.Equals(parentNode.Table) && !parentNode.AncenstoryReversed.Contains(tableColumn.Table)) .Select( x => new TableSearchNode { Table = x.Table, Column = x.Column, Level = parentNode.Level + 1 }); foreach (TableSearchNode table in tables) { parentNode.AddChildNode(sourceColumn, table); if (!table.Table.Equals(targetTable)) { FindLinkingTables(sourceLookup, sourceColumnLookup, table, targetTable, maxSearchDepth); } else { table.NotifySeachResult(true); } } } } |
我查过你的财产。如果
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | public IEnumerable<string> AncenstoryEnum { get { return AncenstoryReversed.Reverse(); } } public IEnumerable<string> AncenstoryReversed { get { TableSearchNode ancestor = ParentNode; while (ancestor != null) { yield return ancestor.tbl; ancestor = ancestor.ParentNode; } } } |
。
好的,这里有一个答案,它基本上放弃了你发布的所有代码。
首先,您应该获取您的
为此,我编写了一个名为
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | class TableColumnIndexer { Dictionary<string, HashSet<string>> tables = new Dictionary<string, HashSet<string>>(); public void Add(string tableName, string columnName) { this.Add(new TableColumns { Table = tableName, Column = columnName }); } public void Add(TableColumns tableColumns) { if(! tables.ContainsKey(tableColumns.Table)) { tables.Add(tableColumns.Table, new HashSet<string>()); } tables[tableColumns.Table].Add(tableColumns.Column); } // .... More code to follow |
现在,一旦将所有表/列值注入到这个索引类中,就可以调用递归方法来检索两个表之间最短的祖先链接。这里的实现有些草率,但它是为了在这一点上清楚地说明性能:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | // .... continuation of TableColumnIndexer class public List<string> GetShortestAncestry(string parentName, string targetName, int maxDepth) { return GetSortestAncestryR(parentName, targetName, maxDepth - 1, 0, new Dictionary<string,int>()); } private List<string> GetSortestAncestryR(string currentName, string targetName, int maxDepth, int currentDepth, Dictionary<string, int> vistedTables) { // Check if we have visited this table before if (!vistedTables.ContainsKey(currentName)) vistedTables.Add(currentName, currentDepth); // Make sure we have not visited this table at a shallower depth before if (vistedTables[currentName] < currentDepth) return null; else vistedTables[currentName] = currentDepth; if (currentDepth <= maxDepth) { List<string> result = new List<string>(); // First check if the current table contains a reference to the target table if (tables[currentName].Contains(targetName)) { result.Add(currentName); result.Add(targetName); return result; } // If not try to see if any of the children tables have the target table else { List<string> bestResult = null; int bestDepth = int.MaxValue; foreach (string childTable in tables[currentName]) { var tempResult = GetSortestAncestryR(childTable, targetName, maxDepth, currentDepth + 1, vistedTables); // Keep only the shortest path found to the target table if (tempResult != null && tempResult.Count < bestDepth) { bestDepth = tempResult.Count; bestResult = tempResult; } } // Take the best link we found and add it to the result list if (bestDepth < int.MaxValue && bestResult != null) { result.Add(currentName); result.AddRange(bestResult); return result; } // If we did not find any result, return nothing else { return null; } } } else { return null; } } } |
。
现在,所有这些代码只是一个(有些冗长)最短路径算法的实现,它允许源表和目标表之间的循环路径和多条路径。注意,如果两个表之间有两个深度相同的路由,算法将只选择一个(不一定是可以预测的)。
在这个源代码方法中,有一些东西让我印象深刻:
在您的 。
如上所述,您的内存不足。最简单的解决方法是删除递归调用(作为隐式堆栈)并将其替换为显式 。 最后,我们不知道 我已经设法将您的 在我看来,您在查询的来避免向对象添加字段)。通常,在一个DFS算法中,这个字段对于未处理的(您以前没有看到过
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
{
string shortName = sourceColumn.Substring(1);
IEnumerable<TableSearchNode> tables =
sourceList.Where(x => x.Column[0].Equals(shortName) &&
x.Color == White)
.Select(x => new TableSearchNode
{
Table = x.Table,
Column = x.Column,
Level = parentNode.Level + 1
});
foreach (TableSearchNode table in tables)
{
parentNode.AddChildNode(sourceColumn, table);
table.Color = Grey;
if (!table.Table.Equals(targetTable))
{
FindLinkingTables(sourceList, table, targetTable, maxSearchDepth);
}
else
{
table.NotifySeachResult(true);
}
table.Color = Black;
}
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
{
Stack<TableSearchNode> stack = new Stack<TableSearchNode>();
TableSearchNode current;
stack.Push(root);
while (stack.Count > 0 && stack.Count < maxSearchDepth)
{
current = stack.Pop();
var tableColumns = sourceList.Where(x => x.Table.Equals(current.Table))
.Select(x => x.Column);
foreach (string sourceColumn in tableColumns)
{
string shortName = sourceColumn.Substring(1);
IEnumerable<TableSearchNode> tables =
sourceList.Where(x => x.Column[0].Equals(shortName) &&
x.Color == White)
.Select(x => new TableSearchNode
{
Table = x.Table,
Column = x.Column,
Level = current.Level + 1
});
foreach (TableSearchNode table in tables)
{
current.AddChildNode(sourceColumn, table);
if (!table.Table.Equals(targetTable))
{
table.Color = Grey;
stack.Push(table);
}
else
{
// you could go ahead and construct the ancestry list here using the stack
table.NotifySeachResult(true);
return;
}
}
}
current.Color = Black;
}
}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
List<TableColumns> sourceList, TableSearchNode parentNode,
string targetTable, int maxSearchDepth)
{
if (parentNode.Level < maxSearchDepth)
{
var sames = sourceList.Where(w => w.Table == parentNode.Table);
var query =
from x in sames
join y in sames
on x.Column.Substring(1) equals y.Column.Substring(1)
where !parentNode.Ancenstory.Contains(y.Table)
select new TableSearchNode
{
Table = x.Table,
Column = x.Column,
Level = parentNode.Level + 1
};
foreach (TableSearchNode z in query)
{
parentNode.AddChildNode(z.Column, z);
if (z.Table != targetTable)
{
FindLinkingTables(sourceList, z, targetTable, maxSearchDepth);
}
else
{
z.NotifySeachResult(true);
}
}
}
}