Rewrite RegexpPreview::MultiLine using Fluentd's parser plugin

Signed-off-by: Kenji Okimoto <okimoto@clear-code.com>
This commit is contained in:
Kenji Okimoto 2018-06-14 15:33:52 +09:00
parent b96a6ed95f
commit 81be7792bd
No known key found for this signature in database
GPG Key ID: F9E3E329A5C5E4A1
2 changed files with 83 additions and 98 deletions

View File

@ -1,80 +1,70 @@
module RegexpPreview
class MultiLine
attr_reader :file, :format, :params
attr_reader :path, :plugin_name, :plugin_config, :plugin
def initialize(file, format, params = {})
@file = file
@format = format
@params = params[:params]
def initialize(path, plugin_name, plugin_config = {})
@path = path
@plugin_name = plugin_name
@plugin_config = plugin_config
config = Fluent::Config::Element.new("ROOT", "", @plugin_config, [])
@plugin = Fluent::Plugin.new_parser(@plugin_name).tap do |instance|
instance.configure(config)
end
end
def matches_json
def matches
{
params: {
setting: { # for vue.js
regexp: nil,
time_format: nil,
}
},
matches: matches.compact,
pluginConfig: @plugin_config,
matches: _matches,
}
end
private
def matches
return [] if patterns.empty?
reader = FileReverseReader.new(File.open(file))
result = []
target_lines = reader.tail(Settings.in_tail_preview_line_count).map{|line| line << "\n" }
whole_string = target_lines.join
re_firstline = Regexp.new(params[:format_firstline])
indexes = []
cur = 0
while first_index = whole_string.index(re_firstline, cur)
indexes << first_index
cur = first_index + 1
end
indexes.each_with_index do |index, i|
next_index = indexes[i + 1] || -1
chunk = whole_string[index...next_index]
ret = detect_chunk(chunk)
next unless ret
result << ret
end
result
end
def detect_chunk(chunk)
whole = ""
matches = []
offset = 0
patterns.each do |pat|
match = chunk.match(pat)
return nil unless match
offset = chunk.index(pat)
return nil if offset > 0
chunk = chunk[match[0].length..-1]
match.names.each_with_index do |name, index|
matches << {
key: name,
matched: match[name],
pos: match.offset(index + 1).map{|pos| pos + whole.length},
}
def _matches
begin
io = File.open(path)
reader = FileReverseReader.new(io)
parserd_chunks = []
target_lines = reader.tail(Settings.in_tail_preview_line_count).map{|line| line << "\n" }
whole_string = target_lines.join
firstline_regex = Regexp.new(plugin_config["format_firstline"][1..-2])
indexes = []
cur = 0
while first_index = whole_string.index(firstline_regex, cur)
indexes << first_index
cur = first_index + 1
end
whole << match[0]
indexes.each_with_index do |index, i|
next_index = indexes[i + 1] || -1
chunk = whole_string[index...next_index]
parsed = {
whole: chunk,
matches: []
}
@plugin.parse(chunk) do |time, record|
next unless record
last_pos = 0
record.each do |key, value|
start = chunk.index(value, last_pos)
finish = start + value.bytesize
last_pos = finish
parsed[:matches] << {
key: key,
matched: value,
pos: [start, finish]
}
end
end
parserd_chunks << parsed
end
parserd_chunks.reject do |parsed|
parsed[:matches].blank?
end
ensure
io.close
end
{
whole: whole,
matches: matches,
}
end
def patterns
@patterns ||= (1..20).map do |n|
params["format#{n}"].presence
end.compact.map {|pattern| Regexp.new(pattern, Regexp::MULTILINE)}
end
end
end

View File

@ -1,33 +1,29 @@
require 'spec_helper'
describe RegexpPreview::MultiLine do
describe "#matches_json" do
subject { parser.matches_json }
let(:parser) { RegexpPreview::MultiLine.new(target_path, "multiline", params) }
describe "#matches" do
subject { parser.matches }
let(:parser) { RegexpPreview::MultiLine.new(target_path, "multiline", plugin_config) }
describe "simple usage" do
let(:target_path) { File.expand_path("./spec/support/fixtures/error0.log", Rails.root) }
let :params do
params = {
format_firstline: "foo",
time_format: "time_format",
let :plugin_config do
plugin_config = {
"format_firstline" => "/foo/",
"time_format" => "time_format",
}
params["format1"] = "(?<foo>foo)\n"
params["format2"] = "(?<bar>bar)"
plugin_config["format1"] = "/(?<foo>foo)\n/"
plugin_config["format2"] = "/(?<bar>bar)/"
3.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i|
params["format#{i}"] = ""
plugin_config["format#{i}"] = "//"
end
{ params: params }
end
it 'should not have regexp and time_format in [:params][:setting]' do
expect(subject[:params][:setting]).to eq(regexp: nil, time_format: nil)
plugin_config
end
it "should include matches info" do
matches_info = {
whole: "foo\nbar",
whole: "foo\nbar\nbaz\n1\n2\n3\n4\n5\n6\n10\n11\n12",
matches: [
{
key: "foo", matched: "foo", pos: [0, 3]
@ -37,24 +33,23 @@ describe RegexpPreview::MultiLine do
}
]
}
expect(subject[:matches]).to include matches_info
end
end
describe "detect only continuos patterns" do
let(:target_path) { File.expand_path("./spec/support/fixtures/error0.log", Rails.root) }
let(:params) do
params = {
format_firstline: "foo",
time_format: "time_format",
let(:plugin_config) do
plugin_config = {
"format_firstline" => "/foo/",
"time_format" => "time_format",
}
params["format1"] = "(?<foo>foo)\n"
params["format2"] = "(?<bar>baz)"
plugin_config["format1"] = "/(?<foo>foo)\n/"
plugin_config["format2"] = "/(?<bar>baz)/"
3.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i|
params["format#{i}"] = ""
plugin_config["format#{i}"] = "//"
end
{ params: params }
plugin_config
end
it "shouldn't match" do
@ -66,21 +61,21 @@ describe RegexpPreview::MultiLine do
# http://docs.fluentd.org/articles/in_tail
let(:target_path) { File.expand_path("./spec/support/fixtures/multiline_example.log", Rails.root) }
let :params do
params = {
format_firstline: "\\d{4}-\\d{1,2}-\\d{1,2}",
"format1" => "^(?<time>\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}) \\[(?<thread>.*)\\] (?<level>[^\\s]+)(?<message>.*)",
time_format: "time_format",
let :plugin_config do
plugin_config = {
"format_firstline" => "/\\d{4}-\\d{1,2}-\\d{1,2}/",
"format1" => "/^(?<time>\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}) \\[(?<thread>.*)\\] (?<level>[^\\s]+)(?<message>.*)/",
"time_format" => "%Y-%m-%d %H:%M:%S",
"keep_time_key" => true
}
2.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i|
params["format#{i}"] = ""
plugin_config["format#{i}"] = "//"
end
{ params: params }
plugin_config
end
it "should include matches info" do
matches_info =
[
matches_info = [
{
whole: "2013-3-03 14:27:33 [main] INFO Main - Start\n",
matches: [