Trie.py

class TrieNode:
    def __init__(self):
        self.children = {}
        self.data = {}
        self.isEndOfUrl = False

class Trie:

    def __init__(self):
        self.root = self.getNode()

    def getNode(self):
        # Returns new trie node (initialized to NULLs)
        return TrieNode()

    def insert(self, url, timestamp, payload):
        urlSplit = url.split('/')
        pCrawl = self.root
        # for level in urlSplit:
        for i in range(1, len(urlSplit)):
            # if current character is not present
            level = urlSplit[i]
            if len(level) == 0: continue
            if pCrawl.children.__contains__(level):
                pCrawl = pCrawl.children[level];
            else:
                pCrawl.children[level] = TrieNode()
                pCrawl = pCrawl.children[level]
        pCrawl.data[timestamp] = payload;
        pCrawl.isEndOfUrl = True

    def extract(self, startTimestamp , endTimeStamp):
        print()
        # extract tree based on given timestamp
        # pCrawl = self.root
        # for child in pCrawl.children:
        #     print(child)

    def comparison(self, tree1, tree2):
        print()
        # compare two trees


def main():
    keys = ['/spotlight/impact/2014-11-24-master/naturalists.html', '/']

    # Trie object
    t = Trie()

    # Construct trie
    for key in keys:
        t.insert(key)

    # Search for different keys
    print("{} ---- {}".format("/spotlight/impact/2014-11-24-master/naturalists.html", [t.search("/spotlight/impact/2014-11-24-master/naturalists.html")]))
    print("{} ---- {}".format("/", [t.search("/")]))


if __name__ == '__main__':
    main()