Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CS_4624_Project
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
nickgrifasi
CS_4624_Project
Commits
8fb12687
Commit
8fb12687
authored
2 years ago
by
Anoushka Deshmukh
Browse files
Options
Downloads
Patches
Plain Diff
script to translate raw data to sessions
parent
1312af7f
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
abstracting_script.py
+328
-0
328 additions, 0 deletions
abstracting_script.py
with
328 additions
and
0 deletions
abstracting_script.py
0 → 100644
+
328
−
0
View file @
8fb12687
import
os
import
pandas
as
pd
import
csv
import
re
from
datetime
import
datetime
as
dt
,
timedelta
from
collections
import
defaultdict
import
time
# Initializing some variables
avg_session_time
=
0
total_session_count
=
0
total_session_time
=
0
document_event
=
[
'
document-ready
'
]
window_event
=
[
'
window-unload
'
,
'
window-blur
'
,
'
window-focus
'
,
]
pe_event
=
[
'
jsav-matrix-click
'
,
'
jsav-exercise-grade
'
,
'
jsav-exercise-reset
'
,
'
jsav-node-click
'
,
'
button-identifybutton
'
,
'
button-editbutton
'
,
'
button-addrowbutton
'
,
'
button-deletebutton
'
,
'
button-setterminalbutton
'
,
'
button-addchildbutton
'
,
'
button-checkbutton
'
,
'
button-autobutton
'
,
'
button-donebutton
'
,
'
submit-helpbutton
'
,
'
submit-edgeButton
'
,
'
submit-deleteButton
'
,
'
submit-undoButton
'
,
'
submit-redoButton
'
,
'
submit-editButton
'
,
'
submit-nodeButton
'
,
'
submit-begin
'
,
'
submit-finish
'
,
'
button-hintbutton
'
,
'
button-movebutton
'
,
'
button-removetreenodebutton
'
,
'
button-savefile
'
,
'
button-edgebutton
'
,
'
jsav-exercise-model-end
'
,
'
jsav-exercise-model-begin
'
,
'
jsav-array-click
'
,
'
jsav-exercise-gradeable-step
'
,
'
jsav-exercise-grade
'
,
'
jsav-exercise-model-open
'
,
'
jsav-exercise-model-forward
'
,
'
jsav-exercise-model-close
'
,
'
jsav-node-click
'
,
'
jsav-exercise-grade-change
'
,
'
jsav-exercise-reset
'
,
'
jsav-exercise-step-fixed
'
,
'
jsav-arraytree-click
'
,
'
jsav-exercise-undo
'
,
'
jsav-exercise-model-backward
'
,
'
jsav-exercise-model-begin
'
,
'
jsav-exercise-step-undone
'
,
'
jsav-exercise-model-end
'
,
'
odsa-award-credit
'
,
'
odsa-exercise-init
'
,
'
button-classify
'
,
'
button-throwRoll
'
,
'
button-calculate
'
,
'
button-decrement
'
,
'
button-help
'
,
'
button-selecting
'
,
'
button-sorting
'
,
'
button-incrementing
'
,
'
button-run
'
,
'
button-partition
'
,
'
button-markSorted
'
,
'
button-reset
'
,
'
button-outputbuffer
'
,
'
button-noaction
'
,
'
button-submit
'
,
'
button-insert
'
,
'
button-remove
'
,
'
button-next
'
,
'
button-about
'
,
'
button-undir
'
,
'
button-dir
'
,
'
button-clear
'
,
'
button-read
'
,
'
button-write
'
,
'
button-restart
'
]
ff_event
=
[
'
jsav-begin
'
,
'
jsav-end
'
,
'
jsav-forward
'
,
'
jsav-backward
'
]
other_event
=
[
'
hyperlink
'
,
'
jsav-narration-on
'
,
'
jsav-narration-off
'
,
'
button-layoutRef
'
,
'
odsa-exercise-init
'
]
# Reads a raw interaction data file
def
readfile
(
file_name
):
os
.
chdir
(
"
./data/
"
+
file_name
)
print
(
"
Reading
"
+
file_name
+
"
data
"
)
df
=
pd
.
read_csv
(
file_name
+
"
_sorted.csv
"
)
global
csvdata
csvdata
=
df
.
sort_values
([
'
user_id
'
,
'
action_time
'
])
print
(
df
.
iloc
[
1
])
# convert time in a specific format to display
def
GetTime
(
seconds
):
sec
=
timedelta
(
seconds
=
int
(
seconds
))
d
=
dt
(
1
,
1
,
1
)
+
sec
return
"
%d days %d hours %d minutes %d seconds
"
%
(
d
.
day
-
1
,
d
.
hour
,
d
.
minute
,
d
.
second
)
# helper function to write a name of the event
def
writeEvName
(
row
):
event
=
row
[
'
name
'
]
if
event
in
document_event
:
return
"
document event
"
elif
event
in
window_event
:
return
"
window event
"
elif
event
in
ff_event
:
return
"
FF event
"
elif
event
in
pe_event
:
return
"
PE event
"
else
:
return
"
Other event
"
# Helper function to write a description for events
def
writeDesc
(
row
):
# exercise_type
if
row
[
'
ex_type
'
]
==
"
pe
"
:
return
"
Attempted to solve PE
"
elif
pd
.
isnull
(
row
[
'
inst_section_id
'
]):
return
row
[
'
description
'
]
else
:
# ev_name
if
row
[
'
short_name
'
]:
return
f
'
Attempted to solve
{
row
[
9
]
}
frame
'
else
:
return
f
'
Attempted to solve
{
row
[
9
]
}
exercise
'
# Helper function to write a time for events
def
writeTime
(
row
,
start
,
end
):
if
row
[
'
description
'
]
==
"
PE
"
or
row
[
'
ex_type
'
]
==
"
pe
"
or
check_pe_helper
(
row
[
'
name
'
]):
return
f
'
{
(
end
-
start
).
total_seconds
()
}
seconds
'
elif
row
[
'
name
'
]
in
ff_event
:
return
f
'
{
(
end
-
start
).
total_seconds
()
}
seconds
'
elif
"
document
"
not
in
row
[
'
name
'
]:
# ev_name
if
row
[
'
short_name
'
]:
return
f
'
In slideshow for
{
(
end
-
start
).
total_seconds
()
}
seconds
'
if
(
end
-
start
).
total_seconds
()
>
0
else
None
else
:
return
f
'
In exercise for
{
(
end
-
start
).
total_seconds
()
}
seconds
'
if
(
end
-
start
).
total_seconds
()
>
0
else
None
# Check whether the event is associated with PE
def
check_pe_helper
(
command
):
if
command
in
pe_event
:
return
True
else
:
return
False
# Check whether this and next events are the same type of event (PE)
def
bundle_pe
(
curr
,
next
):
if
not
pd
.
isnull
(
curr
[
'
ex_type
'
])
or
curr
[
'
description
'
]
==
'
PE
'
:
# curr has value
if
not
pd
.
isnull
(
next
[
'
ex_type
'
]):
# both curr and next has values
if
curr
[
'
ex_type
'
]
!=
'
pe
'
and
next
[
'
ex_type
'
]
!=
'
pe
'
:
return
False
else
:
#if only curr has value
if
not
check_pe_helper
(
next
[
'
name
'
])
and
next
[
'
description
'
]
!=
'
PE
'
:
return
False
else
:
# curr doesn't have value
if
check_pe_helper
(
curr
[
'
name
'
]):
if
next
[
'
ex_type
'
]
==
'
pe
'
or
check_pe_helper
(
next
[
'
name
'
]):
return
True
else
:
return
False
else
:
return
False
return
True
# Check whether this and next events are the same type of event (FF)
def
bundle_ff
(
curr
,
next
):
if
not
pd
.
isnull
(
curr
[
'
short_name
'
])
and
curr
[
'
short_name
'
]
==
next
[
'
short_name
'
]:
return
True
else
:
return
False
# Main function to abstract a raw event data into a session data
def
abstract
(
file_name
):
readfile
(
file_name
)
print
(
"
Reading complete
"
)
global
avg_session_time
global
total_session_count
global
total_session_time
session_count
=
0
session_start_time
=
csvdata
.
iloc
[
0
][
'
action_time
'
]
# Initially set up the start time as the first event's action time
session_end_time
=
0
columns
=
[
"
session
"
,
"
user ID
"
,
"
Inst Book
"
,
"
Event name
"
,
"
Event Description
"
,
"
Start time
"
,
"
End Time
"
,
"
Action Time
"
,
"
Exercise Type
"
,
"
Number of events
"
]
start_time
=
0
num_event
=
1
is_pe_exercise
=
False
is_ff_exercise
=
False
# Write a csv "file_name"_merged_result_unannotated.csv file
with
open
(
file_name
+
'
_merged_result_unannotated.csv
'
,
'
w
'
,
newline
=
""
)
as
csv_file
:
writer
=
csv
.
writer
(
csv_file
)
writer
.
writerow
(
columns
)
session_count
+=
1
row_iterator
=
csvdata
.
iterrows
()
_
,
curr
=
next
(
row_iterator
)
for
i
,
row
in
row_iterator
:
# Set the threshold for the session
threshold
=
600
# Example of sample raw interaction data
# id user_id inst_book_id name description action_time inst_chapter_module_id inst_section_id inst_exercise_id short_name ex_type
# 0 1 2 3 4 5 6 7 8 9 10
# 22961032 8387 722 document-ready "User loaded module" 2020-01-21 18:43 70590
# 23119732 3013 722 jsav-node-click {"ev_num":43} 2020-01-29 2:41 70620 109676 1095 sheet1exercise3 pe
user_id
=
curr
[
'
user_id
'
]
book_id
=
curr
[
'
inst_book_id
'
]
ev_name
=
curr
[
'
name
'
]
ev_desc
=
curr
[
'
description
'
]
action_time
=
curr
[
'
action_time
'
]
module_id
=
curr
[
'
inst_chapter_module_id
'
]
section_id
=
curr
[
'
inst_section_id
'
]
exercise_id
=
curr
[
'
inst_exercise_id
'
]
exercise_name
=
curr
[
'
short_name
'
]
exercise_type
=
curr
[
'
ex_type
'
]
next_ev
=
row
now
=
dt
.
strptime
(
action_time
,
"
%Y-%m-%d %H:%M:%S
"
)
next_time
=
dt
.
strptime
(
next_ev
[
'
action_time
'
],
"
%Y-%m-%d %H:%M:%S
"
)
time_diff
=
(
next_time
-
now
).
total_seconds
()
end_time
=
start_time
if
start_time
==
0
:
start_time
=
action_time
if
session_count
==
0
:
# writer.writerow("\n")
# writer.writerow([f'Session {session_count + 1}'])
session_count
+=
1
session_start_time
=
action_time
total_session_count
=
total_session_count
+
1
if
user_id
==
next_ev
[
'
user_id
'
]:
if
(
time_diff
>
threshold
):
# Creating a new session
# For the last event of a session, we need to add one more row of event at the end of the session
end_time
=
action_time
start
=
dt
.
strptime
(
start_time
,
"
%Y-%m-%d %H:%M:%S
"
)
end
=
dt
.
strptime
(
end_time
,
"
%Y-%m-%d %H:%M:%S
"
)
diff
=
writeTime
(
curr
,
start
,
end
)
# diff = f'{(end - start).total_seconds()} seconds' if (end - start).total_seconds() > 0 else None
if
is_pe_exercise
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
"
Attempted to solve PE
"
,
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
elif
is_ff_exercise
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
writeDesc
(
curr
),
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
else
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
writeDesc
(
curr
),
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
# Write a comprehensive information about a session
# Uncomment these lines if you want to see an annotated version of abstracted data
session_end_time
=
end
# writer.writerow([f'User inactive for {GetTime(time_diff)}'])
# writer.writerow([f'Session lasted for {session_end_time - dt.strptime(session_start_time, "%Y-%m-%d %H:%M:%S")}'])
# writer.writerow("\n")
# writer.writerow([f'Session {session_count + 1}'])
total_session_time
=
total_session_time
+
(
session_end_time
-
dt
.
strptime
(
session_start_time
,
"
%Y-%m-%d %H:%M:%S
"
)).
total_seconds
()
total_session_count
=
total_session_count
+
1
session_count
+=
1
session_start_time
=
next_ev
[
'
action_time
'
]
start_time
=
0
num_event
=
1
is_pe_exercise
=
False
is_ff_exercise
=
False
else
:
# Retreive all events in one session
if
user_id
==
next_ev
[
'
user_id
'
]:
if
ev_name
==
next_ev
[
'
name
'
]:
# Finds duplicate events within the same session
end_time
=
action_time
num_event
+=
1
curr
=
row
continue
else
:
if
bundle_pe
(
curr
,
next_ev
):
end_time
=
action_time
num_event
+=
1
is_pe_exercise
=
True
curr
=
row
continue
elif
bundle_ff
(
curr
,
next_ev
):
end_time
=
action_time
num_event
+=
1
is_ff_exercise
=
True
curr
=
row
continue
else
:
end_time
=
action_time
start
=
dt
.
strptime
(
start_time
,
"
%Y-%m-%d %H:%M:%S
"
)
end
=
dt
.
strptime
(
end_time
,
"
%Y-%m-%d %H:%M:%S
"
)
diff
=
writeTime
(
curr
,
start
,
end
)
# diff = f'In slideshow for {(end - start).total_seconds()} seconds' if (end - start).total_seconds() > 0 else None
if
is_pe_exercise
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
"
Attempted to solve PE
"
,
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
elif
ev_name
==
'
window-focus
'
:
# if next_ev['name'] == 'window-blur':
# if (next_time - now).total_seconds() > 3:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
"
Window open
"
,
writeDesc
(
curr
),
start_time
,
end_time
,
f
'
Reading time:
{
(
next_time
-
now
).
total_seconds
()
}
sec
'
,
exercise_name
,
num_event
])
elif
ev_name
==
'
window-blur
'
and
next_ev
[
'
name
'
]
==
'
window-focus
'
:
# if (next_time - now).total_seconds() > 3:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
"
Window close
"
,
writeDesc
(
curr
),
start_time
,
end_time
,
f
'
Away time:
{
(
next_time
-
now
).
total_seconds
()
}
sec
'
,
exercise_name
,
num_event
])
elif
is_ff_exercise
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
writeDesc
(
curr
),
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
else
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
writeDesc
(
curr
),
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
start_time
=
0
num_event
=
1
is_pe_exercise
=
False
is_ff_exercise
=
False
else
:
start_time
=
0
session_count
=
0
num_event
=
1
else
:
# Starts a new session for new student
# For the last event of a session, we need to add one more row of event at the end of the session
end_time
=
action_time
start
=
dt
.
strptime
(
start_time
,
"
%Y-%m-%d %H:%M:%S
"
)
end
=
dt
.
strptime
(
end_time
,
"
%Y-%m-%d %H:%M:%S
"
)
diff
=
writeTime
(
curr
,
start
,
end
)
# diff = f'{(end - start).total_seconds()} seconds' if (end - start).total_seconds() > 0 else None
if
is_pe_exercise
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
"
Attempted to solve PE
"
,
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
elif
ev_name
==
'
window-focus
'
and
next_ev
[
'
name
'
]
==
'
window-blur
'
:
# if (next_time - now).total_seconds() > 3:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
"
Window open
"
,
writeDesc
(
curr
),
start_time
,
end_time
,
f
'
Reading time:
{
(
next_time
-
now
).
total_seconds
()
}
sec
'
,
exercise_name
,
num_event
])
elif
ev_name
==
'
window-blur
'
and
next_ev
[
'
name
'
]
==
'
window-focus
'
:
# if (next_time - now).total_seconds() > 3:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
"
Window close
"
,
writeDesc
(
curr
),
start_time
,
end_time
,
f
'
Away time:
{
(
next_time
-
now
).
total_seconds
()
}
sec
'
,
exercise_name
,
num_event
])
elif
is_ff_exercise
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
writeDesc
(
curr
),
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
else
:
writer
.
writerow
([
session_count
,
user_id
,
book_id
,
writeEvName
(
curr
),
writeDesc
(
curr
),
start_time
,
end_time
,
diff
,
exercise_name
,
num_event
])
# Write a comprehensive information about a session
start_time
=
0
session_count
=
0
num_event
=
1
session_end_time
=
end
# writer.writerow([f'Session lasted for {session_end_time - dt.strptime(session_start_time, "%Y-%m-%d %H:%M:%S")}'])
# writer.writerow([f'All User Session Ended for User: {user_id}'])
total_session_time
=
total_session_time
+
(
session_end_time
-
dt
.
strptime
(
session_start_time
,
"
%Y-%m-%d %H:%M:%S
"
)).
total_seconds
()
is_pe_exercise
=
False
is_ff_exercise
=
False
curr
=
row
start_time
=
time
.
time
()
abstract
(
"
cs5040_spring_2021
"
)
print
(
total_session_count
,
total_session_time
)
print
(
"
--- %s seconds ---
"
%
(
time
.
time
()
-
start_time
))
# SQL commands for pulling Fall 2020 CS4114 data
# SELECT oui.id, oui.user_id, oui.inst_book_id, oui.name, oui.description, oui.action_time, oui.inst_chapter_module_id, exercise.inst_section_id, exercise.inst_exercise_id, ex.short_name, ex.ex_type
# FROM opendsa.odsa_user_interactions oui
# LEFT JOIN opendsa.inst_book_section_exercises exercise ON oui.inst_book_section_exercise_id = exercise.id
# LEFT JOIN opendsa.inst_exercises ex ON exercise.inst_exercise_id = ex.id
# WHERE oui.inst_book_id = 852 Order by action_time ASC;
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment