-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathparse-log.ls
193 lines (168 loc) · 5.64 KB
/
parse-log.ls
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
require! {cheerio, optimist, fs, request, zhutil}
{id, _} = optimist.argv
zhnumber = <[○ 一 二 三 四 五 六 七 八 九 十]>
zhreg = new RegExp "^((?:#{ zhnumber * '|' })+)、(.*)$", \m
# ad (appointed dates) (屆別)
# session (會期)
# sitting (會次)
class Meta
->
@meta = {raw: []}
push-line: (speaker, text) ->
return if speaker
if @ctx is \speaker
[_, position, name] = text.match /^(?:(.+)\s+)?(.*)$/
@meta.raw.push text
return @
match text
| /立法院第(\d+)屆第(\d+)會期第(\d+)次會議紀錄/ =>
@meta<[ad session sitting]> = that[1 to 3]
| /主\s*席\s+(.*)$/ =>
@ctx = \speaker
@meta.speaker = that.1
@meta.raw.push text
return @
class Announcement
->
@items = {}
@last-item = null
push-line: (speaker, text) ->
if [_, item, content]? = text.match zhreg
item = zhutil.parseZHNumber item
text = content
@last-item = @items[item] = {subject: content, conversation: []}
else
@last-item.conversation.push [speaker, text]
return @
serialize: ->
[\Announcement, @items]
class Proposal
->
@lines = []
push-rich: (node) ->
console.error \NOTYET
@lines.push [null, node.html!]
push-line: (speaker, text) ->
@lines.push [speaker, text]
return @
serialize: ->
[\Proposal, @lines]
class Reconsideration extends Proposal
serialize: ->
[\Reconsideration, @lines]
class Questioning
->
@ctx = ''
@reply = {}
@question = {}
@current-conversation = []
@conversation = []
@subsection = false
@document = false
flush: ->
type = switch
| @exmotion => 'exmotion'
| @document => 'interpdoc'
else 'interp'
if @current-conversation.length
if @subsection
@conversation.push [ type, @current-conversation ]
else
@conversation = @conversation ++ @current-conversation
@current-conversation = []
@exmotion = false
@subsection = true
push-conversation: (speaker, text) ->
if (speaker ? @lastSpeaker) is \主席 and text is /報告院會|詢答時間|已質詢完畢|處理完畢|提書面質詢/
@flush!
@conversation.push [speaker, text]
@document = text is /提書面質詢/
else if !speaker? && @current-conversation.length is 0
@conversation.push [speaker, text] # meeting actions
else
[_, h, m, text]? = text.match /^(?:\(|()(\d+)時(\d+)分(?:\)|))(.*)$/, ''
entry = [speaker, text]
entry.push [+h, +m] if h?
@current-conversation.push entry
if speaker is \主席 and text is /現在.*處理臨時提案/
@exmotion = true
@lastSpeaker = speaker if speaker
push-rich: (node) ->
rich = $ '<div/>' .append node
node.find('img').each -> @.attr \SRC, ''
@push-conversation null, rich.html!
push: (speaker, text) ->
return @push-conversation speaker, text if @in-conversation
if [_, item, content]? = text.match zhreg
item = zhutil.parseZHNumber item
if @ctx is \question
[_, speaker, content] = content.match /^(本院.*?),(.*)$/
text = content
if item
@[@ctx][item] = [speaker, text]
else
@in-conversation = true
@push-conversation speaker, text
push-line: (speaker, text) ->
match text
| /行政院答復部分$/ => @ctx = \reply
| /本院委員質詢部分$/ => @ctx = \question
| otherwise => @push speaker, text
return @
serialize: ->
@flush!
[\Interpellation, {answers: @reply, questions: @question, interpellation: @conversation}]
ctx = meta = new Meta
announcement = new Announcement
questioning = new Questioning
proposal = new Proposal
log = []
store = ->
log.push ctx.serialize! if ctx
newContext = (ctxType) ->
store!
ctx := new ctxType
lastSpeaker = null
parse = ->
switch @.0.name
| \div => @.children!each parse
| \center => @.children!each parse
| \table =>
ctx.push-rich @
| \p =>
text = $(@)text! - /^\s+|\s$/g
return unless text.length
[full, speaker, content]? = text.match /^([^:]{2,10}):(.*)$/
if speaker
if speaker is /以下/
text = full
speaker = null
else
text = content
if text is /報告院會/ and text is /現在散會/
store!
ctx := null
if text is /^報\s+告\s+事\s+項$/
newContext Announcement
else if text is /^質\s+詢\s+事\s+項$/
newContext Questioning
else if (speaker ? lastSpeaker) is \主席 && text is /處理.*黨團.*提案/
newContext Proposal
else if (speaker ? lastSpeaker) is \主席 && text is /處理.*復議案/
newContext Reconsideration
else
if ctx
ctx .= push-line speaker, text
else
log.push [speaker, text]
lastSpeaker := speaker if speaker
else => console.error \unhandled: @.0.name , @.html!
fixup = ->
it.replace /\uE58E/g, '冲'
for file in _
data = fs.readFileSync file, \utf8
data = fixup data
$ = cheerio.load data, { +lowerCaseTags }
$('body').children!each parse
store!
console.log JSON.stringify { meta.meta, log }, null, 4