Mercurial > emacs
comparison lisp/play/landmark.el @ 111206:56f4300fbd9f
* lisp/play/landmark.el: Adjust commenting convention.
(lm-nil-score): Rename from nil-score.
(Xscore, XXscore, XXXscore, XXXXscore, Oscore, OOscore, OOOscore)
(OOOOscore): Move into a let in lm-score-trans-table.
(lm-winning-threshold, lm-loosing-threshold): Use lm-score-trans-table.
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Wed, 27 Oct 2010 10:31:44 -0400 |
parents | cc035ccb9275 |
children | 029e4783cbae |
comparison
equal
deleted
inserted
replaced
111205:e6399f46aefa | 111206:56f4300fbd9f |
---|---|
28 ;; You should have received a copy of the GNU General Public License | 28 ;; You should have received a copy of the GNU General Public License |
29 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | 29 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
30 | 30 |
31 | 31 |
32 ;;; Commentary: | 32 ;;; Commentary: |
33 ;;; Lm is a relatively non-participatory game in which a robot | 33 ;; Lm is a relatively non-participatory game in which a robot |
34 ;;; attempts to maneuver towards a tree at the center of the window | 34 ;; attempts to maneuver towards a tree at the center of the window |
35 ;;; based on unique olfactory cues from each of the 4 directions. If | 35 ;; based on unique olfactory cues from each of the 4 directions. If |
36 ;;; the smell of the tree increases, then the weights in the robot's | 36 ;; the smell of the tree increases, then the weights in the robot's |
37 ;;; brain are adjusted to encourage this odor-driven behavior in the | 37 ;; brain are adjusted to encourage this odor-driven behavior in the |
38 ;;; future. If the smell of the tree decreases, the robots weights are | 38 ;; future. If the smell of the tree decreases, the robots weights are |
39 ;;; adjusted to discourage a correct move. | 39 ;; adjusted to discourage a correct move. |
40 | 40 |
41 ;;; In laymen's terms, the search space is initially flat. The point | 41 ;; In laymen's terms, the search space is initially flat. The point |
42 ;;; of training is to "turn up the edges of the search space" so that | 42 ;; of training is to "turn up the edges of the search space" so that |
43 ;;; the robot rolls toward the center. | 43 ;; the robot rolls toward the center. |
44 | 44 |
45 ;;; Further, do not become alarmed if the robot appears to oscillate | 45 ;; Further, do not become alarmed if the robot appears to oscillate |
46 ;;; back and forth between two or a few positions. This simply means | 46 ;; back and forth between two or a few positions. This simply means |
47 ;;; it is currently caught in a local minimum and is doing its best to | 47 ;; it is currently caught in a local minimum and is doing its best to |
48 ;;; work its way out. | 48 ;; work its way out. |
49 | 49 |
50 ;;; The version of this program as described has a small problem. a | 50 ;; The version of this program as described has a small problem. a |
51 ;;; move in a net direction can produce gross credit assignment. for | 51 ;; move in a net direction can produce gross credit assignment. for |
52 ;;; example, if moving south will produce positive payoff, then, if in | 52 ;; example, if moving south will produce positive payoff, then, if in |
53 ;;; a single move, one moves east,west and south, then both east and | 53 ;; a single move, one moves east,west and south, then both east and |
54 ;;; west will be improved when they shouldn't | 54 ;; west will be improved when they shouldn't |
55 | 55 |
56 ;;; Many thanks to Yuri Pryadkin (yuri@rana.usc.edu) for this | 56 ;; Many thanks to Yuri Pryadkin (yuri@rana.usc.edu) for this |
57 ;;; concise problem description. | 57 ;; concise problem description. |
58 | 58 |
59 ;;;_* Require | 59 ;;;_* Require |
60 (eval-when-compile (require 'cl)) | 60 (eval-when-compile (require 'cl)) |
61 | 61 |
62 ;;;_* From Gomoku | 62 ;;;_* From Gomoku |
301 | 301 |
302 ;; Here are the scores of the nine "non-polluted" configurations. Tuning | 302 ;; Here are the scores of the nine "non-polluted" configurations. Tuning |
303 ;; these values will change (hopefully improve) the strength of the program | 303 ;; these values will change (hopefully improve) the strength of the program |
304 ;; and may change its style (rather aggressive here). | 304 ;; and may change its style (rather aggressive here). |
305 | 305 |
306 (defconst nil-score 7 "Score of an empty qtuple.") | 306 (defconst lm-nil-score 7 "Score of an empty qtuple.") |
307 (defconst Xscore 15 "Score of a qtuple containing one X.") | |
308 (defconst XXscore 400 "Score of a qtuple containing two X's.") | |
309 (defconst XXXscore 1800 "Score of a qtuple containing three X's.") | |
310 (defconst XXXXscore 100000 "Score of a qtuple containing four X's.") | |
311 (defconst Oscore 35 "Score of a qtuple containing one O.") | |
312 (defconst OOscore 800 "Score of a qtuple containing two O's.") | |
313 (defconst OOOscore 15000 "Score of a qtuple containing three O's.") | |
314 (defconst OOOOscore 800000 "Score of a qtuple containing four O's.") | |
315 | |
316 ;; These values are not just random: if, given the following situation: | |
317 ;; | |
318 ;; . . . . . . . O . | |
319 ;; . X X a . . . X . | |
320 ;; . . . X . . . X . | |
321 ;; . . . X . . . X . | |
322 ;; . . . . . . . b . | |
323 ;; | |
324 ;; you want Emacs to play in "a" and not in "b", then the parameters must | |
325 ;; satisfy the inequality: | |
326 ;; | |
327 ;; 6 * XXscore > XXXscore + XXscore | |
328 ;; | |
329 ;; because "a" mainly belongs to six "XX" qtuples (the others are less | |
330 ;; important) while "b" belongs to one "XXX" and one "XX" qtuples. Other | |
331 ;; conditions are required to obtain sensible moves, but the previous example | |
332 ;; should illustrate the point. If you manage to improve on these values, | |
333 ;; please send me a note. Thanks. | |
334 | |
335 | |
336 ;; As we chose values 0, 1 and 6 to denote empty, X and O squares, the | |
337 ;; contents of a qtuple are uniquely determined by the sum of its elements and | |
338 ;; we just have to set up a translation table. | |
339 | 307 |
340 (defconst lm-score-trans-table | 308 (defconst lm-score-trans-table |
341 (vector nil-score Xscore XXscore XXXscore XXXXscore 0 | 309 (let ((Xscore 15) ; Score of a qtuple containing one X. |
342 Oscore 0 0 0 0 0 | 310 (XXscore 400) ; Score of a qtuple containing two X's. |
343 OOscore 0 0 0 0 0 | 311 (XXXscore 1800) ; Score of a qtuple containing three X's. |
344 OOOscore 0 0 0 0 0 | 312 (XXXXscore 100000) ; Score of a qtuple containing four X's. |
345 OOOOscore 0 0 0 0 0 | 313 (Oscore 35) ; Score of a qtuple containing one O. |
346 0) | 314 (OOscore 800) ; Score of a qtuple containing two O's. |
315 (OOOscore 15000) ; Score of a qtuple containing three O's. | |
316 (OOOOscore 800000)) ; Score of a qtuple containing four O's. | |
317 | |
318 ;; These values are not just random: if, given the following situation: | |
319 ;; | |
320 ;; . . . . . . . O . | |
321 ;; . X X a . . . X . | |
322 ;; . . . X . . . X . | |
323 ;; . . . X . . . X . | |
324 ;; . . . . . . . b . | |
325 ;; | |
326 ;; you want Emacs to play in "a" and not in "b", then the parameters must | |
327 ;; satisfy the inequality: | |
328 ;; | |
329 ;; 6 * XXscore > XXXscore + XXscore | |
330 ;; | |
331 ;; because "a" mainly belongs to six "XX" qtuples (the others are less | |
332 ;; important) while "b" belongs to one "XXX" and one "XX" qtuples. | |
333 ;; Other conditions are required to obtain sensible moves, but the | |
334 ;; previous example should illustrate the point. If you manage to | |
335 ;; improve on these values, please send me a note. Thanks. | |
336 | |
337 | |
338 ;; As we chose values 0, 1 and 6 to denote empty, X and O squares, | |
339 ;; the contents of a qtuple are uniquely determined by the sum of | |
340 ;; its elements and we just have to set up a translation table. | |
341 (vector lm-nil-score Xscore XXscore XXXscore XXXXscore 0 | |
342 Oscore 0 0 0 0 0 | |
343 OOscore 0 0 0 0 0 | |
344 OOOscore 0 0 0 0 0 | |
345 OOOOscore 0 0 0 0 0 | |
346 0)) | |
347 "Vector associating qtuple contents to their score.") | 347 "Vector associating qtuple contents to their score.") |
348 | 348 |
349 | 349 |
350 ;; If you do not modify drastically the previous constants, the only way for a | 350 ;; If you do not modify drastically the previous constants, the only way for a |
351 ;; square to have a score higher than OOOOscore is to belong to a "OOOO" | 351 ;; square to have a score higher than OOOOscore is to belong to a "OOOO" |
352 ;; qtuple, thus to be a winning move. Similarly, the only way for a square to | 352 ;; qtuple, thus to be a winning move. Similarly, the only way for a square to |
353 ;; have a score between XXXXscore and OOOOscore is to belong to a "XXXX" | 353 ;; have a score between XXXXscore and OOOOscore is to belong to a "XXXX" |
354 ;; qtuple. We may use these considerations to detect when a given move is | 354 ;; qtuple. We may use these considerations to detect when a given move is |
355 ;; winning or loosing. | 355 ;; winning or loosing. |
356 | 356 |
357 (defconst lm-winning-threshold OOOOscore | 357 (defconst lm-winning-threshold |
358 (aref lm-score-trans-table (+ 6 6 6 6)) ;; OOOOscore | |
358 "Threshold score beyond which an Emacs move is winning.") | 359 "Threshold score beyond which an Emacs move is winning.") |
359 | 360 |
360 (defconst lm-loosing-threshold XXXXscore | 361 (defconst lm-loosing-threshold |
362 (aref lm-score-trans-table (+ 1 1 1 1)) ;; XXXXscore | |
361 "Threshold score beyond which a human move is winning.") | 363 "Threshold score beyond which a human move is winning.") |
362 | 364 |
363 | 365 |
364 (defun lm-strongest-square () | 366 (defun lm-strongest-square () |
365 "Compute index of free square with highest score, or nil if none." | 367 "Compute index of free square with highest score, or nil if none." |
421 (= lm-board-width lm-saved-board-width) | 423 (= lm-board-width lm-saved-board-width) |
422 (= lm-board-height lm-saved-board-height)) | 424 (= lm-board-height lm-saved-board-height)) |
423 (setq lm-score-table (copy-sequence lm-saved-score-table)) | 425 (setq lm-score-table (copy-sequence lm-saved-score-table)) |
424 ;; No, compute it: | 426 ;; No, compute it: |
425 (setq lm-score-table | 427 (setq lm-score-table |
426 (make-vector lm-vector-length (* 20 nil-score))) | 428 (make-vector lm-vector-length (* 20 lm-nil-score))) |
427 (let (i j maxi maxj maxi2 maxj2) | 429 (let (i j maxi maxj maxi2 maxj2) |
428 (setq maxi (/ (1+ lm-board-width) 2) | 430 (setq maxi (/ (1+ lm-board-width) 2) |
429 maxj (/ (1+ lm-board-height) 2) | 431 maxj (/ (1+ lm-board-height) 2) |
430 maxi2 (min 4 maxi) | 432 maxi2 (min 4 maxi) |
431 maxj2 (min 4 maxj)) | 433 maxj2 (min 4 maxj)) |