class TrieNode: def __init__(self): self.children = {} self.data = {} self.name = '' self.isEndOfUrl = False def extract(self, startTimestamp , endTimeStamp): pCrawl = self pCrawlCopy = TrieNode() pCrawlCopy.isEndOfUrl = pCrawl.isEndOfUrl for data in pCrawl.data: if data <= endTimeStamp and data >= startTimestamp: pCrawlCopy.data[data] = pCrawl.data[data] for child in pCrawl.children: pCrawlJunior = pCrawl.children[child] pCrawlCopy.children[child] = pCrawlJunior.extract(startTimestamp, endTimeStamp) return pCrawlCopy class Trie: def __init__(self): self.root = self.getNode() self.matrixElements = {} self.counter = 0 def getNode(self): # Returns new trie node (initialized to NULLs) return TrieNode() def isStructureChange(self, url): urlSplit = url.split('/') pCrawl = self.root isnewpath = False # for level in urlSplit: for i in range(1, len(urlSplit)): # if current character is not present level = urlSplit[i] if len(level) == 0: continue if pCrawl.children.__contains__(level): pCrawl = pCrawl.children[level]; else: isnewpath = True break return isnewpath def insert(self, url, timestamp, payload): newNodePath = '' urlSplit = url.split('/') pCrawl = self.root isnewpath = False # for level in urlSplit: for i in range(1, len(urlSplit)): # if current character is not present level = urlSplit[i] if len(level) == 0: continue if not self.matrixElements.__contains__(level): self.counter = self.counter+1 self.matrixElements[level] = self.counter if pCrawl.children.__contains__(level): pCrawl = pCrawl.children[level]; else: newNodePath = newNodePath+level+'/' pCrawl.children[level] = TrieNode() pCrawl = pCrawl.children[level] pCrawl.name = level pCrawl.data[timestamp] = payload; pCrawl.isEndOfUrl = True if(newNodePath!=''): newNodePath =newNodePath[:-1] isnewpath = True return (isnewpath,newNodePath) def extractNodeData(self, url): newNodePath = '' urlSplit = url.split('/') pCrawl = self.root # for level in urlSplit: for i in range(1, len(urlSplit)): # if current character is not present level = urlSplit[i] if len(level) == 0: continue pCrawl = pCrawl.children[level]; return pCrawl.data def extract(self, startTimestamp , endTimeStamp): # extract tree based on given timestamp if startTimestamp == None or len(startTimestamp.strip())==0: startTimestamp = "0" if endTimeStamp == None or len(endTimeStamp.strip())==0: import sys endTimeStamp = str(sys.maxsize) trieCopy = Trie() trieCopy.counter = self.counter trieCopy.matrixElements = self.matrixElements trieCopy.root = self.root.extract(startTimestamp, endTimeStamp) return trieCopy def isSame(self, tree1): # compare two trees from collections import deque stack_tree2 = deque() stack_tree1 = deque() stack_tree2.append(self.root) stack_tree1.append(tree1) while(len(stack_tree2)!=0): tree2 = stack_tree2.pop() tree = stack_tree1.pop() for data in tree2.data: if tree.data.__contains__(data): if not tree.data[data] == tree2.data[data]: return False else: return False for child in tree2.children: if tree.children.__contains__(child): if not stack_tree2.__contains__(child): stack_tree2.append(tree2.children[child]) stack_tree1.append(tree.children[child]) else: return False if(len(stack_tree1)!=0): return False return True def ancestorMatrixRec(self, node, anc, mat): # base case if node == None: return mat import numpy as np mat = np.asarray(mat) # Update all ancestors of current node data_node = self.matrixElements[node.name] for i in anc: mat[self.matrixElements[i]][data_node] = 1 # Push data to list of ancestors anc.append(node.name) # Traverse left and right subtrees for child in node.children: pCrawlJunior = node.children[child] mat = self.ancestorMatrixRec(pCrawlJunior, anc, mat) # Remove data from list the list of ancestors # as all descendants of it are processed now. anc.pop(-1) return mat # This function mainly calls ancestorMatrixRec() def ancestorMatrix(self): # Create an empty ancestor array anc = [] # rows, cols = (len(self.matrixElements), len(self.matrixElements)) # mat = [[0] * cols] * rows import numpy as np mat = np.zeros((len(self.matrixElements), len(self.matrixElements)),dtype=int) # Fill ancestor matrix and find return self.ancestorMatrixRec(self.root, anc, mat)