Skip to content
Snippets Groups Projects
Commit 7794c844 authored by Ritesh Bansal's avatar Ritesh Bansal
Browse files

added extract tree based on timestamp

parent d97aa3a9
No related branches found
No related tags found
1 merge request!1Site structure
......@@ -4,6 +4,18 @@ class TrieNode:
self.data = {}
self.isEndOfUrl = False
def extract(self, startTimestamp , endTimeStamp):
pCrawl = self
pCrawlCopy = TrieNode()
pCrawlCopy.isEndOfUrl = pCrawl.isEndOfUrl
for data in pCrawl.data:
if data <= endTimeStamp and data >= startTimestamp:
pCrawlCopy.data[data] = pCrawl.data[data]
for child in pCrawl.children:
pCrawlJunior = pCrawl.children[child]
pCrawlCopy.children[child] = pCrawlJunior.extract(startTimestamp, endTimeStamp)
return pCrawlCopy
class Trie:
def __init__(self):
......@@ -30,11 +42,8 @@ class Trie:
pCrawl.isEndOfUrl = True
def extract(self, startTimestamp , endTimeStamp):
print()
# extract tree based on given timestamp
# pCrawl = self.root
# for child in pCrawl.children:
# print(child)
return self.root.extract(startTimestamp, endTimeStamp)
def comparison(self, tree1, tree2):
print()
......
......@@ -45,20 +45,12 @@ for dayData in data_train:
payload = dayDataNP[i][9]
sitemap.insert(parsedurl.path, timestamp, payload)
# if not(sitemapURLS.__contains__(parsedurl.path)):
# sitemapURLS[parsedurl.path] = parsedurl[1]+parsedurl[2]
# sitemap.insert(parsedurl.path, timestamp, payload)
vtTree = sitemapURLS['www.vt.edu']
vtTreeCopy = vtTree.extract('20140906125541','20141215204723')
print('done')
# if not(sitemapURLS.__contains__(parsedurl.path)):
# sitemapURLS[parsedurl.path] = parsedurl[1]+parsedurl[2]
# sitemap.insert(parsedurl.path, timestamp, payload)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment