• R/O
  • SSH
  • HTTPS

igo: コミット


コミットメタ情報

リビジョン67 (tree)
日時2010-03-22 20:22:07
作者phjgt

ログメッセージ

cl-igo: リストではなく、配列キューを使用したバージョン。満足のいく結果ではなかったが一応保存

変更サマリ

差分

--- branches/cl-igo-queue/tagger.lisp (nonexistent)
+++ branches/cl-igo-queue/tagger.lisp (revision 67)
@@ -0,0 +1,100 @@
1+(in-package :igo)
2+
3+;;;;;;;;;;;
4+;;; declaim
5+(declaim (inline coerce-to-simple-string set-mincost-node)
6+ #.*optimize-fastest*)
7+
8+;;;;;;;;;;
9+;;; struct
10+(defstruct tagger
11+ (wdc nil :type dic:word-dic)
12+ (unk nil :type unk:unknown)
13+ (mtx nil :type mtx:matrix))
14+
15+;;;;;;;;;;;;
16+;;; constant
17+(eval-when (:compile-toplevel :load-toplevel :execute)
18+ (igo::defconst-once-only +BOS-NODES+
19+ (let ((que (queue:make-queue)))
20+ (queue:push (vn:make-bos/eos) que)
21+ que)))
22+
23+;;;;;;;;;;;;;;;;;;;;;
24+;;; internal function
25+(defmacro nconcf (lst1 lst2)
26+ `(setf ,lst1 (nconc ,lst1 ,lst2)))
27+
28+(defun set-mincost-node (vn prevs mtx wdc &aux (left-id (vn:left-id vn)))
29+ (flet ((calc-cost (prev)
30+ (+ (vn:cost prev) (mtx:link-cost (vn:right-id prev) left-id mtx))))
31+ (declare (inline calc-cost))
32+
33+ (let ((fst (queue:front prevs)))
34+ (setf (vn:prev vn) fst
35+ (vn:cost vn) (calc-cost fst)))
36+
37+ (queue:each (p prevs 1)
38+ (let ((cost (calc-cost p)))
39+ (when (< cost (vn:cost vn))
40+ (setf (vn:prev vn) p
41+ (vn:cost vn) cost))))
42+
43+ (incf (vn:cost vn) (dic:word-cost (vn:word-id vn) wdc))
44+ vn))
45+
46+(defun parse-impl (tagger cs len)
47+ (declare (fixnum len))
48+ (let ((nodes (make-sequence 'simple-vector (1+ len)))
49+ (wdc (tagger-wdc tagger))
50+ (unk (tagger-unk tagger))
51+ (mtx (tagger-mtx tagger))
52+ (rlt (queue:make-queue)))
53+ (setf (aref nodes 0) +BOS-NODES+)
54+ (loop FOR i FROM 1 TO len DO
55+ (setf (aref nodes i) (queue:make-queue)))
56+
57+ (loop FOR i FROM 0 BELOW len
58+ FOR prevs = (aref nodes i) DO
59+ (setf (code-stream:position cs) i)
60+ (unless (queue:empty? prevs)
61+ (queue:clear rlt)
62+ (queue:each (vn (unk:search cs unk wdc (dic:search cs rlt wdc)))
63+ (if (vn:space? vn)
64+ (queue:push-all prevs (aref nodes (vn:end vn)))
65+ (queue:push (set-mincost-node vn prevs mtx wdc) (aref nodes (vn:end vn)))))))
66+ (vn:prev (set-mincost-node (vn:make-bos/eos) (aref nodes len) mtx wdc))))
67+
68+(defun coerce-to-simple-string (s)
69+ (declare (string s))
70+ (the simple-string
71+ (if (simple-string-p s)
72+ s
73+ (copy-seq s))))
74+
75+(defmacro parse-then-map-result ((viterbi-node text tagger) &body body)
76+ (let ((result (gensym)))
77+ `(let ((,text (coerce-to-simple-string ,text))
78+ (,result '()))
79+ (do ((,viterbi-node (parse-impl ,tagger (code-stream:make ,text 0) (length ,text))
80+ (vn:prev ,viterbi-node)))
81+ ((null (vn:prev ,viterbi-node)) ,result)
82+ (push (progn ,@body) ,result)))))
83+
84+;;;;;;;;;;;;;;;;;;;;;
85+;;; external function
86+(defun load-tagger (data-dir &optional (feature-parser #'identity))
87+ (prog1 (make-tagger :wdc (dic:load data-dir feature-parser)
88+ :unk (unk:load data-dir)
89+ :mtx (mtx:load data-dir))
90+ #+SBCL (sb-ext:gc :full t)))
91+
92+(defun parse (tagger text &aux (wdc (tagger-wdc tagger)))
93+ (parse-then-map-result (vn text tagger)
94+ (list (subseq text (vn:start vn) (vn:end vn))
95+ (dic:word-data (vn:word-id vn) wdc)
96+ (vn:start vn))))
97+
98+(defun wakati (tagger text)
99+ (parse-then-map-result (vn text tagger)
100+ (subseq text (vn:start vn) (vn:end vn))))
\ No newline at end of file
--- branches/cl-igo-queue/util.lisp (nonexistent)
+++ branches/cl-igo-queue/util.lisp (revision 67)
@@ -0,0 +1,23 @@
1+(in-package :igo)
2+
3+(defmacro delete-package-nickname (package)
4+ (declare (ignore package)))
5+#|
6+ `(eval-when (:compile-toplevel :load-toplevel :execute)
7+ (rename-package ,package ,package)))
8+|#
9+
10+(defmacro defconst-once-only (name value &optional documentation)
11+ `(unless (boundp ',name)
12+ (defconstant ,name ,value ,documentation)))
13+
14+(defun split (delim seq &aux (len (length delim)))
15+ (declare #.*optimize-fastest*
16+ (simple-string delim seq))
17+ (when (zerop len)
18+ (return-from split (list seq)))
19+
20+ (loop FOR beg = 0 THEN (the fixnum (+ end len))
21+ FOR end = (search delim seq :start2 beg)
22+ COLLECT (subseq seq beg end)
23+ WHILE end))
\ No newline at end of file
--- branches/cl-igo-queue/type.lisp (nonexistent)
+++ branches/cl-igo-queue/type.lisp (revision 67)
@@ -0,0 +1,15 @@
1+(defpackage igo.type
2+ (:use :common-lisp)
3+ (:export array-index
4+ character-code
5+ utf16-code
6+ negative-fixnum
7+ n-byte))
8+(in-package :igo.type)
9+
10+(deftype array-index () `(integer 0 ,array-total-size-limit))
11+(deftype character-code () `(integer 0 ,char-code-limit))
12+(deftype utf16-code () `(integer 0 #xFFFF))
13+(deftype negative-fixnum () `(integer ,most-negative-fixnum -1))
14+(deftype n-byte (byte-size signed?)
15+ `(,(if signed? 'signed-byte 'unsigned-byte) ,(* byte-size 8)))
\ No newline at end of file
--- branches/cl-igo-queue/queue.lisp (nonexistent)
+++ branches/cl-igo-queue/queue.lisp (revision 67)
@@ -0,0 +1,68 @@
1+(defpackage igo.queue
2+ (:use :common-lisp :igo.type)
3+ (:nicknames :queue)
4+ (:shadow push)
5+ (:export make-queue
6+ push
7+ push-all
8+ clear
9+ front
10+ empty?
11+ each))
12+(in-package :igo.queue)
13+
14+;;;;;;;;;;;
15+;;; declaim
16+(declaim #.igo::*optimize-fastest*
17+ (inline resize push push-all clear front empty? make-queue))
18+
19+;;;;;;;;;;
20+;;; struct
21+(defstruct (queue (:type vector)
22+ (:constructor make-queue (&aux (buf (make-array 16))))
23+ (:conc-name ""))
24+ (buf #() :type simple-vector)
25+ (pos 0 :type array-index))
26+
27+;;;;;;;;;;;;;;;;;;;;;
28+;;; internal function
29+(defun resize (queue new-size)
30+ (setf (buf queue) (adjust-array (buf queue) new-size)))
31+
32+;;;;;;;;;;;;;;;;;;;;;
33+;;; external function
34+(defun push (elem queue &aux (buf (buf queue)) (pos (pos queue)))
35+ (declare #.igo::*optimize-fastest*)
36+ (when (= pos (length buf))
37+ (setf buf (resize queue (* pos 2))))
38+ (setf (aref buf pos) elem)
39+ (incf (pos queue)))
40+
41+(defun push-all (que1 que2)
42+ (let ((new-pos (the array-index (+ (pos que1) (pos que2))))
43+ (buf1 (buf que1))
44+ (buf2 (buf que2)))
45+ (declare (simple-vector buf1 buf2))
46+ (when (>= new-pos (length buf2))
47+ (setf buf2 (resize que2 (the array-index (round (* new-pos 1.5))))))
48+ (setf (subseq buf2 (pos que2) new-pos) buf1
49+ (pos que2) new-pos)))
50+
51+(defmacro each ((var queue &optional (start 0)) &body body)
52+ (let ((i (gensym))
53+ (buf (gensym))
54+ (que (gensym)))
55+ `(let* ((,que ,queue)
56+ (,buf (buf ,que)))
57+ (loop FOR ,i FROM ,start BELOW (pos ,que)
58+ FOR ,var = (aref ,buf ,i) DO
59+ ,@body))))
60+
61+(defun clear (queue)
62+ (setf (pos queue) 0))
63+
64+(defun front (queue)
65+ (aref (buf queue) 0))
66+
67+(defun empty? (queue)
68+ (zerop (pos queue)))
\ No newline at end of file
--- branches/cl-igo-queue/viterbi-node.lisp (nonexistent)
+++ branches/cl-igo-queue/viterbi-node.lisp (revision 67)
@@ -0,0 +1,36 @@
1+(defpackage igo.viterbi-node
2+ (:use :common-lisp)
3+ (:nicknames :vn)
4+ (:export make
5+ make-bos/eos
6+ cost
7+ prev
8+ word-id
9+ left-id
10+ right-id
11+ start
12+ end
13+ space?))
14+(in-package :igo.viterbi-node)
15+
16+;;;;;;;;;;;
17+;;; declaim
18+(declaim (inline new-bos/eos))
19+
20+;;;;;;;;;;
21+;;; struct
22+(defstruct (viterbi-node (:constructor make (word-id start end left-id right-id space?))
23+ (:conc-name "")
24+ (:type vector))
25+ (cost 0 :type fixnum)
26+ (prev nil :type t)
27+ (left-id 0 :type fixnum)
28+ (right-id 0 :type fixnum)
29+ (word-id 0 :type fixnum)
30+ (start 0 :type fixnum)
31+ (end 0 :type fixnum)
32+ (space? nil :type boolean))
33+
34+;;;;;;;;;;;;;;;;;;;;;
35+;;; external function
36+(defun make-bos/eos () (make 0 0 0 0 0 nil))
\ No newline at end of file
--- branches/cl-igo-queue/char-category.lisp (nonexistent)
+++ branches/cl-igo-queue/char-category.lisp (revision 67)
@@ -0,0 +1,58 @@
1+(defpackage igo.char-category
2+ (:use :common-lisp)
3+ (:shadow load)
4+ (:export load
5+ category
6+ category-set
7+ compatible?
8+ category-trie-id
9+ category-length
10+ category-invoke?
11+ category-group?))
12+(in-package :igo.char-category)
13+
14+;;;;;;;;;;;
15+;;; declaim
16+(declaim (inline category compatible?))
17+
18+;;;;;;;;;;
19+;;; struct
20+(defstruct category
21+ (trie-id 0 :type fixnum)
22+ (length 0 :type fixnum)
23+ (invoke? nil :type boolean)
24+ (group? nil :type boolean))
25+
26+(defstruct (category-set (:conc-name ""))
27+ (categorys #() :type (simple-array category))
28+ (char->id #() :type (simple-array (signed-byte 32)))
29+ (eql-masks #() :type (simple-array (signed-byte 32))))
30+
31+;;;;;;;;;;;;;;;;;;;;;
32+;;; internal function
33+(defun load-categorys (root-dir)
34+ (vbs:with-input-file (in (merge-pathnames "char.category" root-dir))
35+ (let ((data (vbs:read-sequence in 4 (/ (vbs:file-size in) 4))))
36+ (coerce
37+ (loop FOR i FROM 0 BELOW (length data) BY 4 COLLECT
38+ (make-category :trie-id (aref data (+ i 0))
39+ :length (aref data (+ i 1))
40+ :invoke? (= 1 (aref data (+ i 2)))
41+ :group? (= 1 (aref data (+ i 3)))))
42+ 'vector))))
43+
44+;;;;;;;;;;;;;;;;;;;;;
45+;;; external-function
46+(defun load (root-dir)
47+ (vbs:with-input-file (in (merge-pathnames "code2category" root-dir))
48+ (make-category-set
49+ :categorys (load-categorys root-dir)
50+ :char->id (vbs:read-sequence in 4 (/ (vbs:file-size in) 4 2))
51+ :eql-masks (vbs:read-sequence in 4 (/ (vbs:file-size in) 4 2)))))
52+
53+(defun category (code cset)
54+ (aref (categorys cset) (aref (char->id cset) code)))
55+
56+(defun compatible? (code1 code2 cset)
57+ (let ((eqls (eql-masks cset)))
58+ (logtest (aref eqls code1) (aref eqls code2))))
\ No newline at end of file
--- branches/cl-igo-queue/package.lisp (nonexistent)
+++ branches/cl-igo-queue/package.lisp (revision 67)
@@ -0,0 +1,22 @@
1+(defpackage igo
2+ (:use :common-lisp)
3+ (:export *ipadic-feature-parser*
4+ load-tagger
5+ parse
6+ wakati))
7+(in-package :igo)
8+
9+(eval-when (:compile-toplevel :load-toplevel)
10+ (defvar *optimize-fastest* '(optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)))
11+ (defvar *optimize-default* '(optimize)))
12+
13+(defvar *ipadic-feature-parser*
14+ (lambda (feature)
15+ (declare #.igo::*optimize-fastest*
16+ (simple-string feature))
17+ (flet ((kw (s) (intern s :keyword))
18+ (kw-if-* (s) (if (string= s "*") (intern s :keyword) s)))
19+ (let ((fs (the list (igo::split "," feature))))
20+ (nconc (mapcar #'kw (subseq fs 0 6))
21+ (mapcar #'kw-if-* (subseq fs 6)))))))
22+
--- branches/cl-igo-queue/unknown.lisp (nonexistent)
+++ branches/cl-igo-queue/unknown.lisp (revision 67)
@@ -0,0 +1,56 @@
1+(defpackage :igo.unknown
2+ (:use :common-lisp :igo.char-category)
3+ (:nicknames :unk)
4+ (:shadow load
5+ search)
6+ (:export load
7+ unknown
8+ search))
9+(in-package :igo.unknown)
10+
11+;;;;;;;;;;
12+;;; struct
13+(defstruct (unknown (:conc-name ""))
14+ (categorys nil :type category-set)
15+ (space-id 0 :type fixnum))
16+
17+;;;;;;;;;;;;;;;;;;;;;
18+;;; external function
19+(defun load (root-dir)
20+ (let* ((cts (igo.char-category:load root-dir))
21+ (unk (make-unknown :categorys cts)))
22+ (setf (space-id unk)
23+ (category-trie-id (category (char-code #\Space) cts)))
24+ unk))
25+
26+(defun search (cs unk wdic result)
27+ (declare #.igo::*optimize-fastest*)
28+ (prog* ((start (code-stream:position cs))
29+ (code (code-stream:read cs))
30+ (categorys (categorys unk))
31+ (ct (category code categorys)))
32+ (unless (or (queue:empty? result) (category-invoke? ct))
33+ (go :end))
34+
35+ (let* ((trie-id (category-trie-id ct))
36+ (space? (= trie-id (space-id unk)))
37+ (limit (category-length ct)))
38+ (loop FOR len FROM 1 TO limit DO
39+ (setf result
40+ (dic:search-from-trie-id trie-id start (code-stream:position cs) space? result wdic))
41+ (when (or (code-stream:end? cs)
42+ (not (compatible? code (code-stream:read cs) categorys)))
43+ (go :end)))
44+
45+ (when (and (category-group? ct))
46+ (loop (when (code-stream:end? cs)
47+ (return))
48+ (unless (compatible? code (code-stream:read cs) categorys)
49+ (code-stream:unread cs)
50+ (return)))
51+ (setf result
52+ (dic:search-from-trie-id trie-id start (code-stream:position cs)
53+ space? result wdic))))
54+ :end
55+ (setf (code-stream:position cs) start))
56+ result)
\ No newline at end of file
--- branches/cl-igo-queue/COPYING (nonexistent)
+++ branches/cl-igo-queue/COPYING (revision 67)
@@ -0,0 +1,21 @@
1+The MIT License
2+
3+Copyright (c) 2010 Takeru Ohta <phjgt308@ybb.ne.jp>
4+
5+Permission is hereby granted, free of charge, to any person obtaining a copy
6+of this software and associated documentation files (the "Software"), to deal
7+in the Software without restriction, including without limitation the rights
8+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+copies of the Software, and to permit persons to whom the Software is
10+furnished to do so, subject to the following conditions:
11+
12+The above copyright notice and this permission notice shall be included in
13+all copies or substantial portions of the Software.
14+
15+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+THE SOFTWARE.
--- branches/cl-igo-queue/trie.lisp (nonexistent)
+++ branches/cl-igo-queue/trie.lisp (revision 67)
@@ -0,0 +1,78 @@
1+(defpackage igo.trie
2+ (:use :common-lisp :igo.type)
3+ (:nicknames :trie)
4+ (:shadow load)
5+ (:export trie
6+ load
7+ each-common-prefix))
8+(in-package :igo.trie)
9+
10+;;;;;;;;;;;
11+;;; declaim
12+(declaim (inline id including-tail?))
13+
14+;;;;;;;;;;
15+;;; struct
16+(defstruct (trie (:conc-name ""))
17+ (element-count 0 :type fixnum)
18+ (begs #() :type (simple-array (signed-byte 32)))
19+ (lens #() :type (simple-array (signed-byte 16)))
20+ (base #() :type (simple-array (signed-byte 32)))
21+ (chck #() :type (simple-array (unsigned-byte 16)))
22+ (tail #() :type (simple-array (unsigned-byte 16))))
23+
24+;;;;;;;;;;;;;;;;;;;;;
25+;;; internal function
26+(defun id (node) (1- (- (the negative-fixnum node))))
27+
28+(defun including-tail? (cs node trie &aux (id (id node)) (tail (tail trie)))
29+ (loop REPEAT (aref (lens trie) id)
30+ FOR i fixnum FROM (aref (begs trie) id)
31+ ALWAYS (= (aref tail i) (code-stream:read cs))))
32+
33+(defmacro with-gensyms (vars &body body)
34+ `(let ,(mapcar (lambda (v) `(,v (gensym))) vars)
35+ ,@body))
36+
37+;;;;;;;;;;;;;;;;;;;;;
38+;;; external function
39+(defun load (path)
40+ (vbs:with-input-file (in path)
41+ (let ((node-size (vbs:read-byte in 4))
42+ (tind-size (vbs:read-byte in 4))
43+ (tail-size (vbs:read-byte in 4)))
44+ (make-trie
45+ :element-count tind-size
46+ :begs (vbs:read-sequence in 4 tind-size)
47+ :base (vbs:read-sequence in 4 node-size)
48+ :lens (vbs:read-sequence in 2 tind-size)
49+ :chck (vbs:read-sequence in 2 node-size :signed nil)
50+ :tail (vbs:read-sequence in 2 tail-size :signed nil)))))
51+
52+(defmacro each-common-prefix ((pos id cs trie) &body body)
53+ (with-gensyms (base chck node code idx loop-block)
54+ `(let* ((,base (base ,trie))
55+ (,chck (chck ,trie))
56+ (,node (aref ,base 0)))
57+ (declare (fixnum ,node))
58+ (block ,loop-block
59+ (loop FOR ,code = (code-stream:read ,cs) DO
60+ (unless (= ,code code-stream:+TERMINATE-CODE+)
61+ (let ((,idx (+ ,node code-stream:+TERMINATE-CODE+)))
62+ (when (= (aref ,chck ,idx) code-stream:+TERMINATE-CODE+)
63+ (let ((,pos (1- (code-stream:position ,cs)))
64+ (,id (id (aref ,base ,idx))))
65+ ,@body))))
66+
67+ (prog ((,idx (+ ,node ,code)))
68+ (when (= (aref ,chck ,idx) ,code)
69+ (setf ,node (aref ,base ,idx))
70+ (if (plusp ,node)
71+ (go :continue)
72+ (when (including-tail? ,cs ,node ,trie)
73+ (let ((,pos (code-stream:position ,cs))
74+ (,id (id ,node)))
75+ ,@body))))
76+ (return-from ,loop-block)
77+
78+ :continue))))))
\ No newline at end of file
--- branches/cl-igo-queue/code-stream.lisp (nonexistent)
+++ branches/cl-igo-queue/code-stream.lisp (revision 67)
@@ -0,0 +1,75 @@
1+(defpackage igo.code-stream
2+ (:use :common-lisp :igo.type)
3+ (:nicknames :code-stream)
4+ (:shadow read
5+ position)
6+ (:export read
7+ unread
8+ make
9+ end?
10+ position
11+ +TERMINATE-CODE+))
12+(in-package :igo.code-stream)
13+
14+;;;;;;;;;;;
15+;;; declaim
16+(declaim (inline end? code low-surrogate high-surrogate)
17+ (optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0))
18+ (ftype (function (code-stream) utf16-code) read))
19+
20+;;;;;;;;;;
21+;;; struct
22+(defstruct (code-stream (:constructor make (source start &aux (position start)))
23+ (:conc-name ""))
24+ (source "" :type simple-string)
25+ (position 0 :type array-index)
26+ (surrogate? nil :type boolean))
27+
28+;;;;;;;;;;;;
29+;;; constant
30+(eval-when (:compile-toplevel :load-toplevel :execute)
31+ (defconstant +TERMINATE-CODE+ 0))
32+
33+;;;;;;;;;;;;;;;;;;;;;
34+;;; internal function
35+(defun code (code-stream)
36+ (char-code (char (source code-stream) (position code-stream))))
37+
38+;;;;;;;;;;;;;;;;;;;;;
39+;;; external function
40+(defun end? (code-stream)
41+ (= (position code-stream) (length (source code-stream))))
42+
43+(defun low-surrogate (code)
44+ (declare (character-code code))
45+ (+ #xDC00 (ldb (byte 10 0) code)))
46+
47+(defun high-surrogate (code)
48+ (declare (character-code code))
49+ (+ #xB800 (- (ldb (byte 11 10) code) #b1000000)))
50+
51+(defun read (code-stream)
52+ (declare (code-stream code-stream))
53+ (with-slots (position surrogate?) code-stream
54+ (cond (surrogate?
55+ (setf surrogate? nil)
56+ (prog1 (low-surrogate (code code-stream))
57+ (incf position)))
58+
59+ ((end? code-stream)
60+ +TERMINATE-CODE+)
61+
62+ (t
63+ (let ((code (code code-stream)))
64+ (if (> code #xFFFF)
65+ (progn (setf surrogate? t)
66+ (high-surrogate code))
67+ (progn (incf position)
68+ code)))))))
69+
70+(defun unread (code-stream)
71+ (declare (code-stream code-stream))
72+ (with-slots (position surrogate?) code-stream
73+ (if surrogate?
74+ (setf surrogate? nil)
75+ (decf position))))
--- branches/cl-igo-queue/varied-byte-stream.lisp (nonexistent)
+++ branches/cl-igo-queue/varied-byte-stream.lisp (revision 67)
@@ -0,0 +1,42 @@
1+(defpackage igo.varied-byte-stream
2+ (:use :common-lisp :igo.type)
3+ (:nicknames :vbs)
4+ (:shadow read-byte
5+ read-sequence)
6+ (:export with-input-file
7+ read-byte
8+ read-sequence
9+ file-size))
10+(in-package :igo.varied-byte-stream)
11+
12+;;;;;;;;;;
13+;;; struct
14+(defstruct varied-byte-stream
15+ (source nil :type file-stream)
16+ (offset 0 :type fixnum))
17+
18+;;;;;;;;;;;;;;;;;;;;;
19+;;; external function
20+(defmacro with-input-file ((stream filespec) &body body)
21+ `(with-open-file (,stream ,filespec)
22+ (let ((,stream (make-varied-byte-stream :source ,stream)))
23+ ,@body)))
24+
25+(defun file-size (vbs)
26+ (file-length (varied-byte-stream-source vbs)))
27+
28+(defun read-byte (varied-byte-stream byte-size &key (signed t))
29+ (with-slots (source offset) varied-byte-stream
30+ (with-open-file (in source :element-type `(n-byte ,byte-size ,signed))
31+ (file-position in (/ offset byte-size))
32+ (prog1 (common-lisp:read-byte in)
33+ (incf offset byte-size)))))
34+
35+(defun read-sequence (varied-byte-stream byte-size count &key (signed t))
36+ (with-slots (source offset) varied-byte-stream
37+ (with-open-file (in source :element-type `(n-byte ,byte-size ,signed))
38+ (file-position in (/ offset byte-size))
39+ (let ((buf (make-array count :element-type `(n-byte ,byte-size ,signed))))
40+ (common-lisp:read-sequence buf in)
41+ (incf offset (* byte-size count))
42+ buf))))
\ No newline at end of file
--- branches/cl-igo-queue/matrix.lisp (nonexistent)
+++ branches/cl-igo-queue/matrix.lisp (revision 67)
@@ -0,0 +1,35 @@
1+(defpackage igo.matrix
2+ (:use :common-lisp)
3+ (:nicknames :mtx)
4+ (:shadow load)
5+ (:export load
6+ link-cost
7+ matrix))
8+(in-package :igo.matrix)
9+
10+;;;;;;;;;;;
11+;;; declaim
12+(declaim (inline link-cost))
13+
14+;;;;;;;;
15+;;; struct
16+(defstruct (matrix (:conc-name ""))
17+ (matrix #() :type (simple-array (signed-byte 16)))
18+ (left-size 0 :type (unsigned-byte 16))
19+ (right-size 0 :type (unsigned-byte 16)))
20+
21+;;;;;;;;;;;;;;;;;;;;;
22+;;; external function
23+(defun load (data-dir)
24+ (vbs:with-input-file (in (merge-pathnames "matrix.bin" data-dir))
25+ (let ((left-size (vbs:read-byte in 4))
26+ (right-size (vbs:read-byte in 4)))
27+ (make-matrix :left-size left-size
28+ :right-size right-size
29+ :matrix (vbs:read-sequence in 2 (* left-size right-size))))))
30+
31+(defun link-cost (left-id right-id matrix)
32+ (declare ((unsigned-byte 16) left-id right-id))
33+ (the (signed-byte 16)
34+ (aref (matrix matrix)
35+ (+ (* (right-size matrix) right-id) left-id))))
\ No newline at end of file
--- branches/cl-igo-queue/word-dic.lisp (nonexistent)
+++ branches/cl-igo-queue/word-dic.lisp (revision 67)
@@ -0,0 +1,90 @@
1+(defpackage igo.word-dic
2+ (:use :common-lisp)
3+ (:nicknames :dic)
4+ (:shadow load
5+ search)
6+ (:export load
7+ word-dic
8+ word-data
9+ word-cost
10+ search
11+ search-from-trie-id))
12+(in-package :igo.word-dic)
13+
14+;;;;;;;;;;;
15+;;; declaim
16+(declaim (inline word-data word-cost left-id right-id))
17+
18+;;;;;;;;;;
19+;;; struct
20+(defstruct word-dic
21+ (trie nil :type trie:trie)
22+ (costs #() :type (simple-array (signed-byte 16)))
23+ (left-ids #() :type (simple-array (signed-byte 16)))
24+ (right-ids #() :type (simple-array (signed-byte 16)))
25+ (data #() :type (simple-array t))
26+ (indices #() :type (simple-array (signed-byte 32))))
27+
28+;;;;;;;;;;;;;;;;;;;;;
29+;;; internal function
30+(defun read-indices (path)
31+ (vbs:with-input-file (in path)
32+ (vbs:read-sequence in 4 (/ (vbs:file-size in) 4))))
33+
34+(defun read-data (path)
35+ ;; TODO: check surrogate pair
36+ (vbs:with-input-file (in path)
37+ (map 'string #'code-char (vbs:read-sequence in 2 (/ (vbs:file-size in) 2) :signed nil))))
38+
39+(defun split-data (data offsets feature-parser)
40+ (declare #.igo::*optimize-fastest*
41+ ((simple-array (signed-byte 32)) offsets)
42+ (simple-string data)
43+ (function feature-parser))
44+ (let ((ary (make-array (1- (length offsets)))))
45+ (dotimes (i (length ary) ary)
46+ (setf (aref ary i)
47+ (funcall feature-parser (subseq data (aref offsets i) (aref offsets (1+ i))))))))
48+
49+(defun left-id (word-id wdic) (aref (word-dic-left-ids wdic) word-id))
50+(defun right-id (word-id wdic) (aref (word-dic-right-ids wdic) word-id))
51+
52+;;;;;;;;;;;;;;;;;;;;;
53+;;; external function
54+(defun load (root-dir &optional (feature-parser #'identity))
55+ (flet ((fullpath (name) (merge-pathnames root-dir name)))
56+ (vbs:with-input-file (in (fullpath "word.inf"))
57+ (let* ((word-count (/ (vbs:file-size in) (+ 4 2 2 2)))
58+ (data (read-data (fullpath "word.dat")))
59+ (offsets (vbs:read-sequence in 4 word-count)))
60+ (make-word-dic
61+ :trie (trie:load (fullpath "word2id"))
62+ :indices (read-indices (fullpath "word.ary.idx"))
63+ :data (split-data data offsets feature-parser)
64+
65+ :left-ids (vbs:read-sequence in 2 word-count)
66+ :right-ids (vbs:read-sequence in 2 word-count)
67+ :costs (vbs:read-sequence in 2 word-count))))))
68+
69+(defun search (cs result wdic)
70+ (declare #.igo::*optimize-fastest*)
71+ (let ((start (code-stream:position cs))
72+ (indices (word-dic-indices wdic))
73+ (trie (word-dic-trie wdic)))
74+ (trie:each-common-prefix (end id cs trie)
75+ (loop FOR i fixnum FROM (aref indices id) BELOW (aref indices (1+ id)) DO
76+ (queue:push (vn:make i start end (left-id i wdic) (right-id i wdic) nil)
77+ result)))
78+ (setf (code-stream:position cs) start))
79+ result)
80+
81+(defun search-from-trie-id (id start end space? result wdic)
82+ (declare #.igo::*optimize-fastest*)
83+ (let ((indices (word-dic-indices wdic)))
84+ (loop FOR i fixnum FROM (aref indices id) BELOW (aref indices (1+ id)) DO
85+ (queue:push (vn:make i start end (left-id i wdic) (right-id i wdic) space?)
86+ result)))
87+ result)
88+
89+(defun word-cost (word-id wdic) (aref (word-dic-costs wdic) word-id))
90+(defun word-data (word-id wdic) (aref (word-dic-data wdic) word-id))
\ No newline at end of file
--- branches/cl-igo-queue/delete-nicknames.lisp (nonexistent)
+++ branches/cl-igo-queue/delete-nicknames.lisp (revision 67)
@@ -0,0 +1,5 @@
1+#|
2+(delete-package-nickname :igo.varied-byte-stream)
3+(delete-package-nickname :igo.code-stream)
4+(delete-package-nickname :igo.trie)
5+|#
\ No newline at end of file
旧リポジトリブラウザで表示