Skip to content

Commit 2e4d781

Browse files
committed
text processing experiments with an external morpho analyser. Beginning.
1 parent d7acd2a commit 2e4d781

1 file changed

Lines changed: 45 additions & 0 deletions

File tree

samples/text_tagger.rb

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# 'Ruby script' node type for text morph analysis.
2+
# It uses mystem utility by Yandex
3+
4+
require 'open3'
5+
require 'json'
6+
7+
def mystem(in_str)
8+
out = ''
9+
result = {}
10+
Open3.popen3('./mystem -gin --format json') do |stdin, stdout, stderr, wait_thr|
11+
# stdin, stdout and stderr should be closed explicitly in this form.
12+
stdin.puts in_str
13+
stdin.close
14+
15+
exit_status = wait_thr.value # Process::Status object returned.
16+
17+
out = stdout.readlines
18+
19+
stdout.close
20+
stderr.close
21+
end
22+
23+
out.each do |line|
24+
str = line.force_encoding 'utf-8'
25+
analysis = JSON.parse(str)
26+
27+
analysis['analysis'].each do |term|
28+
#puts term['lex'], term['gr'].partition(/\w+/)[1]
29+
if (gr = term['gr'])
30+
result[term['lex']] = gr.partition(/\w+/)[1]
31+
end
32+
end
33+
end
34+
35+
result
36+
end
37+
38+
count = $in_data_0.length
39+
$in_data_0.each_with_index do |row,i|
40+
terms = mystem row.document.to_string
41+
# puts terms
42+
43+
$out_data_0 << row
44+
setProgress "#{i*100/count}%"
45+
end

0 commit comments

Comments
 (0)