Skip to content
Snippets Groups Projects
Forked from xw0078 / CS-6604-WebArchive
11 commits behind the upstream repository.
Trie.py 2.99 KiB
class TrieNode:
    def __init__(self):
        self.children = {}
        self.data = {}
        self.isEndOfUrl = False

    def extract(self, startTimestamp , endTimeStamp):
        pCrawl = self
        pCrawlCopy = TrieNode()
        pCrawlCopy.isEndOfUrl = pCrawl.isEndOfUrl
        for data in pCrawl.data:
            if data <= endTimeStamp and data >= startTimestamp:
                pCrawlCopy.data[data] = pCrawl.data[data]
        for child in pCrawl.children:
            pCrawlJunior = pCrawl.children[child]
            pCrawlCopy.children[child] = pCrawlJunior.extract(startTimestamp, endTimeStamp)
        return pCrawlCopy

class Trie:

    def __init__(self):
        self.root = self.getNode()

    def getNode(self):
        # Returns new trie node (initialized to NULLs)
        return TrieNode()

    def insert(self, url, timestamp, payload):
        urlSplit = url.split('/')
        pCrawl = self.root
        # for level in urlSplit:
        for i in range(1, len(urlSplit)):
            # if current character is not present
            level = urlSplit[i]
            if len(level) == 0: continue
            if pCrawl.children.__contains__(level):
                pCrawl = pCrawl.children[level];
            else:
                pCrawl.children[level] = TrieNode()
                pCrawl = pCrawl.children[level]
        pCrawl.data[timestamp] = payload;
        pCrawl.isEndOfUrl = True

    def extract(self, startTimestamp , endTimeStamp):
        # extract tree based on given timestamp
        return self.root.extract(startTimestamp, endTimeStamp)

    def comparison(self, tree1):
        # compare two trees
        from collections import deque
        stack_tree2 = deque()
        stack_tree1 = deque()
        stack_tree2.append(self.root)
        stack_tree1.append(tree1)

        while (len(stack_tree2) != 0):
            tree2 = stack_tree2.pop()
            tree = stack_tree1.pop()
            for data in tree2.data:
                if tree.data.__contains__(data):
                    if not tree.data[data] == tree2.data[data]:
                        return False
                else:
                    return False

            for child in tree2.children:
                if tree.children.__contains__(child):
                    if not stack_tree2.__contains__(child):
                        stack_tree2.append(tree2.children[child])
                        stack_tree1.append(tree.children[child])
                else:
                    return False
        if (len(stack_tree1) != 0):
            return False
        return True


def main():
    keys = ['/spotlight/impact/2014-11-24-master/naturalists.html', '/']

    # Trie object
    t = Trie()

    # Construct trie
    for key in keys:
        t.insert(key)

    # Search for different keys
    print("{} ---- {}".format("/spotlight/impact/2014-11-24-master/naturalists.html", [t.search("/spotlight/impact/2014-11-24-master/naturalists.html")]))
    print("{} ---- {}".format("/", [t.search("/")]))


if __name__ == '__main__':
    main()