class TrieNode: def __init__(self): self.children = {} self.data = {} self.isEndOfUrl = False def extract(self, startTimestamp , endTimeStamp): pCrawl = self pCrawlCopy = TrieNode() pCrawlCopy.isEndOfUrl = pCrawl.isEndOfUrl for data in pCrawl.data: if data <= endTimeStamp and data >= startTimestamp: pCrawlCopy.data[data] = pCrawl.data[data] for child in pCrawl.children: pCrawlJunior = pCrawl.children[child] pCrawlCopy.children[child] = pCrawlJunior.extract(startTimestamp, endTimeStamp) return pCrawlCopy class Trie: def __init__(self): self.root = self.getNode() def getNode(self): # Returns new trie node (initialized to NULLs) return TrieNode() def insert(self, url, timestamp, payload): urlSplit = url.split('/') pCrawl = self.root # for level in urlSplit: for i in range(1, len(urlSplit)): # if current character is not present level = urlSplit[i] if len(level) == 0: continue if pCrawl.children.__contains__(level): pCrawl = pCrawl.children[level]; else: pCrawl.children[level] = TrieNode() pCrawl = pCrawl.children[level] pCrawl.data[timestamp] = payload; pCrawl.isEndOfUrl = True def extract(self, startTimestamp , endTimeStamp): # extract tree based on given timestamp return self.root.extract(startTimestamp, endTimeStamp) def comparison(self, tree1): # compare two trees from collections import deque stack_tree2 = deque() stack_tree1 = deque() stack_tree2.append(self.root) stack_tree1.append(tree1) while (len(stack_tree2) != 0): tree2 = stack_tree2.pop() tree = stack_tree1.pop() for data in tree2.data: if tree.data.__contains__(data): if not tree.data[data] == tree2.data[data]: return False else: return False for child in tree2.children: if tree.children.__contains__(child): if not stack_tree2.__contains__(child): stack_tree2.append(tree2.children[child]) stack_tree1.append(tree.children[child]) else: return False if (len(stack_tree1) != 0): return False return True def main(): keys = ['/spotlight/impact/2014-11-24-master/naturalists.html', '/'] # Trie object t = Trie() # Construct trie for key in keys: t.insert(key) # Search for different keys print("{} ---- {}".format("/spotlight/impact/2014-11-24-master/naturalists.html", [t.search("/spotlight/impact/2014-11-24-master/naturalists.html")])) print("{} ---- {}".format("/", [t.search("/")])) if __name__ == '__main__': main()