Newer
Older
class TrieNode:
def __init__(self):
self.children = {}
self.data = {}
self.isEndOfUrl = False
def extract(self, startTimestamp , endTimeStamp):
pCrawl = self
pCrawlCopy = TrieNode()
pCrawlCopy.isEndOfUrl = pCrawl.isEndOfUrl
for data in pCrawl.data:
if data <= endTimeStamp and data >= startTimestamp:
pCrawlCopy.data[data] = pCrawl.data[data]
for child in pCrawl.children:
pCrawlJunior = pCrawl.children[child]
pCrawlCopy.children[child] = pCrawlJunior.extract(startTimestamp, endTimeStamp)
return pCrawlCopy
class Trie:
def __init__(self):
self.root = self.getNode()
def getNode(self):
# Returns new trie node (initialized to NULLs)
return TrieNode()
def insert(self, url, timestamp, payload):
urlSplit = url.split('/')
pCrawl = self.root
# for level in urlSplit:
for i in range(1, len(urlSplit)):
# if current character is not present
level = urlSplit[i]
if len(level) == 0: continue
if pCrawl.children.__contains__(level):
pCrawl = pCrawl.children[level];
else:
pCrawl.children[level] = TrieNode()
pCrawl = pCrawl.children[level]
pCrawl.data[timestamp] = payload;
pCrawl.isEndOfUrl = True
def extract(self, startTimestamp , endTimeStamp):
# extract tree based on given timestamp
return self.root.extract(startTimestamp, endTimeStamp)
def comparison(self, tree1, tree2):
print()
# compare two trees
def main():
keys = ['/spotlight/impact/2014-11-24-master/naturalists.html', '/']
# Trie object
t = Trie()
# Construct trie
for key in keys:
t.insert(key)
# Search for different keys
print("{} ---- {}".format("/spotlight/impact/2014-11-24-master/naturalists.html", [t.search("/spotlight/impact/2014-11-24-master/naturalists.html")]))
print("{} ---- {}".format("/", [t.search("/")]))
if __name__ == '__main__':
main()