Trie.py

class TrieNode:
    def __init__(self):
        self.children = {}
        self.data = {}
        self.isEndOfUrl = False

    def extract(self, startTimestamp , endTimeStamp):
        pCrawl = self
        pCrawlCopy = TrieNode()
        pCrawlCopy.isEndOfUrl = pCrawl.isEndOfUrl
        for data in pCrawl.data:
            if data <= endTimeStamp and data >= startTimestamp:
                pCrawlCopy.data[data] = pCrawl.data[data]
        for child in pCrawl.children:
            pCrawlJunior = pCrawl.children[child]
            pCrawlCopy.children[child] = pCrawlJunior.extract(startTimestamp, endTimeStamp)
        return pCrawlCopy

class Trie:

    def __init__(self):
        self.root = self.getNode()

    def getNode(self):
        # Returns new trie node (initialized to NULLs)
        return TrieNode()

    def insert(self, url, timestamp, payload):
        urlSplit = url.split('/')
        pCrawl = self.root
        # for level in urlSplit:
        for i in range(1, len(urlSplit)):
            # if current character is not present
            level = urlSplit[i]
            if len(level) == 0: continue
            if pCrawl.children.__contains__(level):
                pCrawl = pCrawl.children[level];
            else:
                pCrawl.children[level] = TrieNode()
                pCrawl = pCrawl.children[level]
        pCrawl.data[timestamp] = payload;
        pCrawl.isEndOfUrl = True

    def extract(self, startTimestamp , endTimeStamp):
        # extract tree based on given timestamp
        return self.root.extract(startTimestamp, endTimeStamp)

    def comparison(self, tree1, tree2):
        print()
        # compare two trees


def main():
    keys = ['/spotlight/impact/2014-11-24-master/naturalists.html', '/']

    # Trie object
    t = Trie()

    # Construct trie
    for key in keys:
        t.insert(key)

    # Search for different keys
    print("{} ---- {}".format("/spotlight/impact/2014-11-24-master/naturalists.html", [t.search("/spotlight/impact/2014-11-24-master/naturalists.html")]))
    print("{} ---- {}".format("/", [t.search("/")]))


if __name__ == '__main__':
    main()