Rewrite RegexpPreview::MultiLine using Fluentd's parser plugin

Signed-off-by: Kenji Okimoto <okimoto@clear-code.com>
This commit is contained in:
Kenji Okimoto 2018-06-14 15:33:52 +09:00
parent b96a6ed95f
commit 81be7792bd
No known key found for this signature in database
GPG Key ID: F9E3E329A5C5E4A1
2 changed files with 83 additions and 98 deletions

View File

@ -1,80 +1,70 @@
module RegexpPreview module RegexpPreview
class MultiLine class MultiLine
attr_reader :file, :format, :params attr_reader :path, :plugin_name, :plugin_config, :plugin
def initialize(file, format, params = {}) def initialize(path, plugin_name, plugin_config = {})
@file = file @path = path
@format = format @plugin_name = plugin_name
@params = params[:params] @plugin_config = plugin_config
config = Fluent::Config::Element.new("ROOT", "", @plugin_config, [])
@plugin = Fluent::Plugin.new_parser(@plugin_name).tap do |instance|
instance.configure(config)
end
end end
def matches_json def matches
{ {
params: { pluginConfig: @plugin_config,
setting: { # for vue.js matches: _matches,
regexp: nil,
time_format: nil,
}
},
matches: matches.compact,
} }
end end
private private
def matches def _matches
return [] if patterns.empty? begin
reader = FileReverseReader.new(File.open(file)) io = File.open(path)
result = [] reader = FileReverseReader.new(io)
target_lines = reader.tail(Settings.in_tail_preview_line_count).map{|line| line << "\n" } parserd_chunks = []
whole_string = target_lines.join target_lines = reader.tail(Settings.in_tail_preview_line_count).map{|line| line << "\n" }
re_firstline = Regexp.new(params[:format_firstline]) whole_string = target_lines.join
indexes = [] firstline_regex = Regexp.new(plugin_config["format_firstline"][1..-2])
cur = 0 indexes = []
while first_index = whole_string.index(re_firstline, cur) cur = 0
indexes << first_index while first_index = whole_string.index(firstline_regex, cur)
cur = first_index + 1 indexes << first_index
end cur = first_index + 1
indexes.each_with_index do |index, i|
next_index = indexes[i + 1] || -1
chunk = whole_string[index...next_index]
ret = detect_chunk(chunk)
next unless ret
result << ret
end
result
end
def detect_chunk(chunk)
whole = ""
matches = []
offset = 0
patterns.each do |pat|
match = chunk.match(pat)
return nil unless match
offset = chunk.index(pat)
return nil if offset > 0
chunk = chunk[match[0].length..-1]
match.names.each_with_index do |name, index|
matches << {
key: name,
matched: match[name],
pos: match.offset(index + 1).map{|pos| pos + whole.length},
}
end end
whole << match[0] indexes.each_with_index do |index, i|
next_index = indexes[i + 1] || -1
chunk = whole_string[index...next_index]
parsed = {
whole: chunk,
matches: []
}
@plugin.parse(chunk) do |time, record|
next unless record
last_pos = 0
record.each do |key, value|
start = chunk.index(value, last_pos)
finish = start + value.bytesize
last_pos = finish
parsed[:matches] << {
key: key,
matched: value,
pos: [start, finish]
}
end
end
parserd_chunks << parsed
end
parserd_chunks.reject do |parsed|
parsed[:matches].blank?
end
ensure
io.close
end end
{
whole: whole,
matches: matches,
}
end
def patterns
@patterns ||= (1..20).map do |n|
params["format#{n}"].presence
end.compact.map {|pattern| Regexp.new(pattern, Regexp::MULTILINE)}
end end
end end
end end

View File

@ -1,33 +1,29 @@
require 'spec_helper' require 'spec_helper'
describe RegexpPreview::MultiLine do describe RegexpPreview::MultiLine do
describe "#matches_json" do describe "#matches" do
subject { parser.matches_json } subject { parser.matches }
let(:parser) { RegexpPreview::MultiLine.new(target_path, "multiline", params) } let(:parser) { RegexpPreview::MultiLine.new(target_path, "multiline", plugin_config) }
describe "simple usage" do describe "simple usage" do
let(:target_path) { File.expand_path("./spec/support/fixtures/error0.log", Rails.root) } let(:target_path) { File.expand_path("./spec/support/fixtures/error0.log", Rails.root) }
let :params do let :plugin_config do
params = { plugin_config = {
format_firstline: "foo", "format_firstline" => "/foo/",
time_format: "time_format", "time_format" => "time_format",
} }
params["format1"] = "(?<foo>foo)\n" plugin_config["format1"] = "/(?<foo>foo)\n/"
params["format2"] = "(?<bar>bar)" plugin_config["format2"] = "/(?<bar>bar)/"
3.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i| 3.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i|
params["format#{i}"] = "" plugin_config["format#{i}"] = "//"
end end
{ params: params } plugin_config
end
it 'should not have regexp and time_format in [:params][:setting]' do
expect(subject[:params][:setting]).to eq(regexp: nil, time_format: nil)
end end
it "should include matches info" do it "should include matches info" do
matches_info = { matches_info = {
whole: "foo\nbar", whole: "foo\nbar\nbaz\n1\n2\n3\n4\n5\n6\n10\n11\n12",
matches: [ matches: [
{ {
key: "foo", matched: "foo", pos: [0, 3] key: "foo", matched: "foo", pos: [0, 3]
@ -37,24 +33,23 @@ describe RegexpPreview::MultiLine do
} }
] ]
} }
expect(subject[:matches]).to include matches_info expect(subject[:matches]).to include matches_info
end end
end end
describe "detect only continuos patterns" do describe "detect only continuos patterns" do
let(:target_path) { File.expand_path("./spec/support/fixtures/error0.log", Rails.root) } let(:target_path) { File.expand_path("./spec/support/fixtures/error0.log", Rails.root) }
let(:params) do let(:plugin_config) do
params = { plugin_config = {
format_firstline: "foo", "format_firstline" => "/foo/",
time_format: "time_format", "time_format" => "time_format",
} }
params["format1"] = "(?<foo>foo)\n" plugin_config["format1"] = "/(?<foo>foo)\n/"
params["format2"] = "(?<bar>baz)" plugin_config["format2"] = "/(?<bar>baz)/"
3.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i| 3.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i|
params["format#{i}"] = "" plugin_config["format#{i}"] = "//"
end end
{ params: params } plugin_config
end end
it "shouldn't match" do it "shouldn't match" do
@ -66,21 +61,21 @@ describe RegexpPreview::MultiLine do
# http://docs.fluentd.org/articles/in_tail # http://docs.fluentd.org/articles/in_tail
let(:target_path) { File.expand_path("./spec/support/fixtures/multiline_example.log", Rails.root) } let(:target_path) { File.expand_path("./spec/support/fixtures/multiline_example.log", Rails.root) }
let :params do let :plugin_config do
params = { plugin_config = {
format_firstline: "\\d{4}-\\d{1,2}-\\d{1,2}", "format_firstline" => "/\\d{4}-\\d{1,2}-\\d{1,2}/",
"format1" => "^(?<time>\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}) \\[(?<thread>.*)\\] (?<level>[^\\s]+)(?<message>.*)", "format1" => "/^(?<time>\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}) \\[(?<thread>.*)\\] (?<level>[^\\s]+)(?<message>.*)/",
time_format: "time_format", "time_format" => "%Y-%m-%d %H:%M:%S",
"keep_time_key" => true
} }
2.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i| 2.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i|
params["format#{i}"] = "" plugin_config["format#{i}"] = "//"
end end
{ params: params } plugin_config
end end
it "should include matches info" do it "should include matches info" do
matches_info = matches_info = [
[
{ {
whole: "2013-3-03 14:27:33 [main] INFO Main - Start\n", whole: "2013-3-03 14:27:33 [main] INFO Main - Start\n",
matches: [ matches: [