From 32cda43509de7b8e5e98b76a1635c967470e4239 Mon Sep 17 00:00:00 2001
From: Ritesh Bansal <riteshobansal@gmail.com>
Date: Tue, 3 Dec 2019 11:45:21 -0500
Subject: [PATCH] bugs

---
 CNN_1hour2levelMainNew.py |  6 +++---
 Trie.py                   | 38 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/CNN_1hour2levelMainNew.py b/CNN_1hour2levelMainNew.py
index 5bebb40..825f05e 100644
--- a/CNN_1hour2levelMainNew.py
+++ b/CNN_1hour2levelMainNew.py
@@ -20,10 +20,10 @@ sitemapdomains = cnnFocusCrawl.makingSitemapTree(data_train)
 sitemapdomains = cnnFocusCrawl.testingSitemapTreeClassiyRF(sitemapdomains, data_test)
 
 edition_cnn_com = sitemapdomains['www.cnn.com']
-edition_cnn_com_Copy = edition_cnn_com.extract()
-result  = edition_cnn_com.comparison(edition_cnn_com_Copy.root)
+edition_cnn_com_Copy = edition_cnn_com.extract("","")
+result  = edition_cnn_com.isSame(edition_cnn_com_Copy.root)
 print(result)
-result  = edition_cnn_com.comparison(edition_cnn_com.root)
+result  = edition_cnn_com.isSame(edition_cnn_com.root)
 print(result)
 matrix = edition_cnn_com.ancestorMatrix()
 matrix = np.asarray(matrix)
diff --git a/Trie.py b/Trie.py
index 118f4c5..b5dbb73 100644
--- a/Trie.py
+++ b/Trie.py
@@ -27,6 +27,22 @@ class Trie:
         # Returns new trie node (initialized to NULLs)
         return TrieNode()
 
+    def isStructureChange(self, url):
+        urlSplit = url.split('/')
+        pCrawl = self.root
+        isnewpath = False
+        # for level in urlSplit:
+        for i in range(1, len(urlSplit)):
+            # if current character is not present
+            level = urlSplit[i]
+            if len(level) == 0: continue
+            if pCrawl.children.__contains__(level):
+                pCrawl = pCrawl.children[level];
+            else:
+                isnewpath = True
+                break
+        return isnewpath
+
     def insert(self, url, timestamp, payload):
         newNodePath = ''
         urlSplit = url.split('/')
@@ -54,15 +70,34 @@ class Trie:
             isnewpath = True
         return (isnewpath,newNodePath)
 
+    def extractNodeData(self, url):
+        newNodePath = ''
+        urlSplit = url.split('/')
+        pCrawl = self.root
+        # for level in urlSplit:
+        for i in range(1, len(urlSplit)):
+            # if current character is not present
+            level = urlSplit[i]
+            if len(level) == 0: continue
+            pCrawl = pCrawl.children[level];
+        return pCrawl.data
+
+
+
     def extract(self, startTimestamp , endTimeStamp):
         # extract tree based on given timestamp
+        if startTimestamp == None or len(startTimestamp.strip())==0:
+            startTimestamp = "0"
+        if endTimeStamp == None or len(endTimeStamp.strip())==0:
+            import sys
+            endTimeStamp = str(sys.maxsize)
         trieCopy = Trie()
         trieCopy.counter = self.counter
         trieCopy.matrixElements = self.matrixElements
         trieCopy.root = self.root.extract(startTimestamp, endTimeStamp)
         return trieCopy
 
-    def comparison(self, tree1):
+    def isSame(self, tree1):
         # compare two trees
         from collections import deque
         stack_tree2 = deque()
@@ -109,7 +144,6 @@ class Trie:
         anc.append(node.name)
 
         # Traverse left and right subtrees
-
         for child in node.children:
             pCrawlJunior = node.children[child]
             mat = self.ancestorMatrixRec(pCrawlJunior, anc, mat)
-- 
GitLab