comparison lisp/play/landmark.el @ 111206:56f4300fbd9f

* lisp/play/landmark.el: Adjust commenting convention. (lm-nil-score): Rename from nil-score. (Xscore, XXscore, XXXscore, XXXXscore, Oscore, OOscore, OOOscore) (OOOOscore): Move into a let in lm-score-trans-table. (lm-winning-threshold, lm-loosing-threshold): Use lm-score-trans-table.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Wed, 27 Oct 2010 10:31:44 -0400
parents cc035ccb9275
children 029e4783cbae
comparison
equal deleted inserted replaced
111205:e6399f46aefa 111206:56f4300fbd9f
28 ;; You should have received a copy of the GNU General Public License 28 ;; You should have received a copy of the GNU General Public License
29 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. 29 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
30 30
31 31
32 ;;; Commentary: 32 ;;; Commentary:
33 ;;; Lm is a relatively non-participatory game in which a robot 33 ;; Lm is a relatively non-participatory game in which a robot
34 ;;; attempts to maneuver towards a tree at the center of the window 34 ;; attempts to maneuver towards a tree at the center of the window
35 ;;; based on unique olfactory cues from each of the 4 directions. If 35 ;; based on unique olfactory cues from each of the 4 directions. If
36 ;;; the smell of the tree increases, then the weights in the robot's 36 ;; the smell of the tree increases, then the weights in the robot's
37 ;;; brain are adjusted to encourage this odor-driven behavior in the 37 ;; brain are adjusted to encourage this odor-driven behavior in the
38 ;;; future. If the smell of the tree decreases, the robots weights are 38 ;; future. If the smell of the tree decreases, the robots weights are
39 ;;; adjusted to discourage a correct move. 39 ;; adjusted to discourage a correct move.
40 40
41 ;;; In laymen's terms, the search space is initially flat. The point 41 ;; In laymen's terms, the search space is initially flat. The point
42 ;;; of training is to "turn up the edges of the search space" so that 42 ;; of training is to "turn up the edges of the search space" so that
43 ;;; the robot rolls toward the center. 43 ;; the robot rolls toward the center.
44 44
45 ;;; Further, do not become alarmed if the robot appears to oscillate 45 ;; Further, do not become alarmed if the robot appears to oscillate
46 ;;; back and forth between two or a few positions. This simply means 46 ;; back and forth between two or a few positions. This simply means
47 ;;; it is currently caught in a local minimum and is doing its best to 47 ;; it is currently caught in a local minimum and is doing its best to
48 ;;; work its way out. 48 ;; work its way out.
49 49
50 ;;; The version of this program as described has a small problem. a 50 ;; The version of this program as described has a small problem. a
51 ;;; move in a net direction can produce gross credit assignment. for 51 ;; move in a net direction can produce gross credit assignment. for
52 ;;; example, if moving south will produce positive payoff, then, if in 52 ;; example, if moving south will produce positive payoff, then, if in
53 ;;; a single move, one moves east,west and south, then both east and 53 ;; a single move, one moves east,west and south, then both east and
54 ;;; west will be improved when they shouldn't 54 ;; west will be improved when they shouldn't
55 55
56 ;;; Many thanks to Yuri Pryadkin (yuri@rana.usc.edu) for this 56 ;; Many thanks to Yuri Pryadkin (yuri@rana.usc.edu) for this
57 ;;; concise problem description. 57 ;; concise problem description.
58 58
59 ;;;_* Require 59 ;;;_* Require
60 (eval-when-compile (require 'cl)) 60 (eval-when-compile (require 'cl))
61 61
62 ;;;_* From Gomoku 62 ;;;_* From Gomoku
301 301
302 ;; Here are the scores of the nine "non-polluted" configurations. Tuning 302 ;; Here are the scores of the nine "non-polluted" configurations. Tuning
303 ;; these values will change (hopefully improve) the strength of the program 303 ;; these values will change (hopefully improve) the strength of the program
304 ;; and may change its style (rather aggressive here). 304 ;; and may change its style (rather aggressive here).
305 305
306 (defconst nil-score 7 "Score of an empty qtuple.") 306 (defconst lm-nil-score 7 "Score of an empty qtuple.")
307 (defconst Xscore 15 "Score of a qtuple containing one X.")
308 (defconst XXscore 400 "Score of a qtuple containing two X's.")
309 (defconst XXXscore 1800 "Score of a qtuple containing three X's.")
310 (defconst XXXXscore 100000 "Score of a qtuple containing four X's.")
311 (defconst Oscore 35 "Score of a qtuple containing one O.")
312 (defconst OOscore 800 "Score of a qtuple containing two O's.")
313 (defconst OOOscore 15000 "Score of a qtuple containing three O's.")
314 (defconst OOOOscore 800000 "Score of a qtuple containing four O's.")
315
316 ;; These values are not just random: if, given the following situation:
317 ;;
318 ;; . . . . . . . O .
319 ;; . X X a . . . X .
320 ;; . . . X . . . X .
321 ;; . . . X . . . X .
322 ;; . . . . . . . b .
323 ;;
324 ;; you want Emacs to play in "a" and not in "b", then the parameters must
325 ;; satisfy the inequality:
326 ;;
327 ;; 6 * XXscore > XXXscore + XXscore
328 ;;
329 ;; because "a" mainly belongs to six "XX" qtuples (the others are less
330 ;; important) while "b" belongs to one "XXX" and one "XX" qtuples. Other
331 ;; conditions are required to obtain sensible moves, but the previous example
332 ;; should illustrate the point. If you manage to improve on these values,
333 ;; please send me a note. Thanks.
334
335
336 ;; As we chose values 0, 1 and 6 to denote empty, X and O squares, the
337 ;; contents of a qtuple are uniquely determined by the sum of its elements and
338 ;; we just have to set up a translation table.
339 307
340 (defconst lm-score-trans-table 308 (defconst lm-score-trans-table
341 (vector nil-score Xscore XXscore XXXscore XXXXscore 0 309 (let ((Xscore 15) ; Score of a qtuple containing one X.
342 Oscore 0 0 0 0 0 310 (XXscore 400) ; Score of a qtuple containing two X's.
343 OOscore 0 0 0 0 0 311 (XXXscore 1800) ; Score of a qtuple containing three X's.
344 OOOscore 0 0 0 0 0 312 (XXXXscore 100000) ; Score of a qtuple containing four X's.
345 OOOOscore 0 0 0 0 0 313 (Oscore 35) ; Score of a qtuple containing one O.
346 0) 314 (OOscore 800) ; Score of a qtuple containing two O's.
315 (OOOscore 15000) ; Score of a qtuple containing three O's.
316 (OOOOscore 800000)) ; Score of a qtuple containing four O's.
317
318 ;; These values are not just random: if, given the following situation:
319 ;;
320 ;; . . . . . . . O .
321 ;; . X X a . . . X .
322 ;; . . . X . . . X .
323 ;; . . . X . . . X .
324 ;; . . . . . . . b .
325 ;;
326 ;; you want Emacs to play in "a" and not in "b", then the parameters must
327 ;; satisfy the inequality:
328 ;;
329 ;; 6 * XXscore > XXXscore + XXscore
330 ;;
331 ;; because "a" mainly belongs to six "XX" qtuples (the others are less
332 ;; important) while "b" belongs to one "XXX" and one "XX" qtuples.
333 ;; Other conditions are required to obtain sensible moves, but the
334 ;; previous example should illustrate the point. If you manage to
335 ;; improve on these values, please send me a note. Thanks.
336
337
338 ;; As we chose values 0, 1 and 6 to denote empty, X and O squares,
339 ;; the contents of a qtuple are uniquely determined by the sum of
340 ;; its elements and we just have to set up a translation table.
341 (vector lm-nil-score Xscore XXscore XXXscore XXXXscore 0
342 Oscore 0 0 0 0 0
343 OOscore 0 0 0 0 0
344 OOOscore 0 0 0 0 0
345 OOOOscore 0 0 0 0 0
346 0))
347 "Vector associating qtuple contents to their score.") 347 "Vector associating qtuple contents to their score.")
348 348
349 349
350 ;; If you do not modify drastically the previous constants, the only way for a 350 ;; If you do not modify drastically the previous constants, the only way for a
351 ;; square to have a score higher than OOOOscore is to belong to a "OOOO" 351 ;; square to have a score higher than OOOOscore is to belong to a "OOOO"
352 ;; qtuple, thus to be a winning move. Similarly, the only way for a square to 352 ;; qtuple, thus to be a winning move. Similarly, the only way for a square to
353 ;; have a score between XXXXscore and OOOOscore is to belong to a "XXXX" 353 ;; have a score between XXXXscore and OOOOscore is to belong to a "XXXX"
354 ;; qtuple. We may use these considerations to detect when a given move is 354 ;; qtuple. We may use these considerations to detect when a given move is
355 ;; winning or loosing. 355 ;; winning or loosing.
356 356
357 (defconst lm-winning-threshold OOOOscore 357 (defconst lm-winning-threshold
358 (aref lm-score-trans-table (+ 6 6 6 6)) ;; OOOOscore
358 "Threshold score beyond which an Emacs move is winning.") 359 "Threshold score beyond which an Emacs move is winning.")
359 360
360 (defconst lm-loosing-threshold XXXXscore 361 (defconst lm-loosing-threshold
362 (aref lm-score-trans-table (+ 1 1 1 1)) ;; XXXXscore
361 "Threshold score beyond which a human move is winning.") 363 "Threshold score beyond which a human move is winning.")
362 364
363 365
364 (defun lm-strongest-square () 366 (defun lm-strongest-square ()
365 "Compute index of free square with highest score, or nil if none." 367 "Compute index of free square with highest score, or nil if none."
421 (= lm-board-width lm-saved-board-width) 423 (= lm-board-width lm-saved-board-width)
422 (= lm-board-height lm-saved-board-height)) 424 (= lm-board-height lm-saved-board-height))
423 (setq lm-score-table (copy-sequence lm-saved-score-table)) 425 (setq lm-score-table (copy-sequence lm-saved-score-table))
424 ;; No, compute it: 426 ;; No, compute it:
425 (setq lm-score-table 427 (setq lm-score-table
426 (make-vector lm-vector-length (* 20 nil-score))) 428 (make-vector lm-vector-length (* 20 lm-nil-score)))
427 (let (i j maxi maxj maxi2 maxj2) 429 (let (i j maxi maxj maxi2 maxj2)
428 (setq maxi (/ (1+ lm-board-width) 2) 430 (setq maxi (/ (1+ lm-board-width) 2)
429 maxj (/ (1+ lm-board-height) 2) 431 maxj (/ (1+ lm-board-height) 2)
430 maxi2 (min 4 maxi) 432 maxi2 (min 4 maxi)
431 maxj2 (min 4 maxj)) 433 maxj2 (min 4 maxj))