This repository has been archived by the owner on Jul 3, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.lisp
363 lines (325 loc) · 9.98 KB
/
model.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
;;; Load files for the program
;; For the files to load correctly, the Lisp environment's load path must be set to the directory of the project
(load (merge-pathnames "program.lisp" *load-truename*))
(clear-all)
(define-model jameson
;; Model parameters
(sgp :v nil :trace-detail low :show-focus t :ncnar nil
:esc t :bll .5 :er t :lf 0.0
:ans .15 :mp 20.0 :rt -10.0
:ult t :egs 3.0 :alpha .2)
(sgp-fct (list :ul *learning*))
;; Chunk types definitions
(chunk-type position pos-x pos-y)
(chunk-type estimate projectile-position projectile-direction action)
(chunk-type environment
jameson-position oldest-position middle-position newest-position
jameson-encoded projectile-encoded
projectile-position projectile-direction
action result state)
;; Declarative memory initialization
(add-dm
(goal isa environment state start)
(start)
; Environment perception states
(attending) (encoding-jameson) (encoding-projectile) (done) (take-action)
; Environment interpretation states
(estimating)
; Learning states
(retrieving) (results))
;; Environment perception productions
; Detects an object in the environment
; and moves the attention of Jameson to the object
(P attend-object
=goal>
isa environment
state start
=visual-location>
screen-x =x
screen-y =y
?visual>
state free
?imaginal>
state free
==>
=goal>
state attending
+visual>
cmd move-attention
screen-pos =visual-location
+imaginal>
isa position
pos-x =x
pos-y =y)
; Encodes the position where Jameson perceives himself
(P encode-jameson
=goal>
isa environment
state attending
=visual>
value "J"
==>
=goal>
state encoding-jameson
+visual-location>
:attended nil)
; Encodes the position of an attended projectile
(P encode-projectile
=goal>
isa environment
state attending
=visual>
- value "J"
==>
=goal>
state encoding-projectile
+visual-location>
:attended nil)
; Adds the position where Jameson perceives himself at
(P add-position-jameson
=goal>
isa environment
state encoding-jameson
?imaginal>
buffer full
=imaginal>
==>
=goal>
state start
jameson-position =imaginal
jameson-encoded done)
; Adds a position to the projectile's trajectory when the
; positions for the previous 2 or 3 timesteps are known
(P add-position-full-trajectory
=goal>
isa environment
state encoding-projectile
middle-position =middle-position
newest-position =newest-position
?imaginal>
buffer full
=imaginal>
==>
=goal>
state start
projectile-encoded done
oldest-position =middle-position
middle-position =newest-position
newest-position =imaginal)
; Adds a position to the projectile's trajectory when the
; position for previous timestep is known
(P add-position-partial-trajectory
=goal>
isa environment
state encoding-projectile
middle-position nil
newest-position =newest-position
?imaginal>
buffer full
=imaginal>
==>
=goal>
state start
projectile-encoded done
middle-position =newest-position
newest-position =imaginal)
; Adds a position to the projectile's trajectory when no
; previous positions are known
(P add-position-no-trajectory
=goal>
isa environment
state encoding-projectile
newest-position nil
?imaginal>
buffer full
=imaginal>
==>
=goal>
state start
projectile-encoded done
newest-position =imaginal)
; Stops the model from progressing if it does not have
; enough information to guess the behavior of the projectile
(P cant-take-action
=goal>
isa environment
state start
jameson-encoded done
projectile-encoded done
oldest-position nil
?manual>
state free
==>
=goal>
jameson-encoded nil
projectile-encoded nil
+manual>
cmd press-key
key "s"
-imaginal>)
; Allows the model to progress if it has enough information
; to guess the behavior of the projectile
(P can-take-action
=goal>
isa environment
state start
jameson-encoded done
projectile-encoded done
- oldest-position nil
==>
=goal>
jameson-encoded nil
projectile-encoded nil
state estimating
-imaginal>)
;; Environment interpretation productions
;; NOTE: This is the section where the estimation made by the
;; productions should take into account more information
;; about the environment if we want to learn more
;; efficiently or support more use cases (e.g. parabolic
;; trajectories)
; Interpret the environment and extract the relevant information
; that should guide the decision about where to move next
(P interpret-environment
=goal>
isa environment
state estimating
jameson-position =j
middle-position =mp
newest-position =np
!bind! =np-y (chunk-slot-value-fct =np 'pos-y)
!bind! =mp-y (chunk-slot-value-fct =mp 'pos-y)
!bind! =j-y (chunk-slot-value-fct =j 'pos-y)
!bind! =position (cond
((equal =np-y =j-y) "equal")
((> =np-y =j-y) "lower")
((< =np-y =j-y) "higher"))
!bind! =direction (cond
((equal =np-y =mp-y) "level")
((> =np-y =mp-y) "down")
((< =np-y =mp-y) "up"))
==>
=goal>
state retrieving
projectile-position =position
projectile-direction =direction
+retrieval>
isa estimate
projectile-position =position
projectile-direction =direction)
;; New environment heuristics productions
; Provides a heuristic for when it should be a good
; decision not to move, if the environment is not known
(P cant-remember-stay
=goal>
isa environment
state retrieving
?retrieval>
buffer failure
?manual>
state free
==>
=goal>
state start
action "s"
+manual>
cmd press-key
key "s")
; Provides a heuristic for when it should be a good
; decision to move up, if the environment is not known
(P cant-remember-up
=goal>
isa environment
state retrieving
?retrieval>
buffer failure
?manual>
state free
==>
=goal>
state start
action "u"
+manual>
cmd press-key
key "u")
; Provides a heuristic for when it should be a good
; decision to move down, if the environment is not known
(P cant-remember-down
=goal>
isa environment
state retrieving
?retrieval>
buffer failure
?manual>
state free
==>
=goal>
state start
action "d"
+manual>
cmd press-key
key "d")
;; Chunk retrieval learning productions
; Executes the action usually taken when in that
; environment if the environment is known
(P remember-environment
=goal>
isa environment
state retrieving
=retrieval>
isa estimate
action =action
?manual>
state free
==>
=goal>
state start
action =action
+manual>
cmd press-key
key =action
@retrieval>)
; Remembers the action taken if it lead to not being hit
(P results-not-hit
=goal>
isa environment
state results
projectile-position =position
projectile-direction =direction
action =action
result "not-hit"
?imaginal>
state free
==>
@goal>
+imaginal>
isa estimate
projectile-position =position
projectile-direction =direction
action =action)
; Forgets the action taken if it lead to being hit
(P results-hit
=goal>
isa environment
state results
result "hit"
==>
@goal>)
; Resets the goal buffer, after it was cleared explicitly to
; avoid going into declarative memory
(P reinitialize-goal
?goal>
buffer empty
==>
+goal>
isa environment
state start)
(goal-focus goal)
;; Initialize utilities for each action production
(spp cant-remember-stay :u 10)
(spp cant-remember-up :u 10)
(spp cant-remember-down :u 10)
;; Trigger the reward at the end of a trial
(when *learning*
(spp results-not-hit :reward 10)
(spp results-hit :reward -10)))