From 106c2c1de73e8d366fc52ddf584f9dcd2e28c6f9 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Mon, 26 Mar 2012 16:26:46 +0400 Subject: [PATCH 01/20] =?UTF-8?q?Add=20rule=20"=D1=81=D1=85=20->=20skh"=20?= =?UTF-8?q?to=20the=20transliteration=20scheme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/russian/transliteration.rb | 2 ++ spec/transliteration_spec.rb | 1 + 2 files changed, 3 insertions(+) diff --git a/lib/russian/transliteration.rb b/lib/russian/transliteration.rb index 27f89af..f8a7479 100644 --- a/lib/russian/transliteration.rb +++ b/lib/russian/transliteration.rb @@ -24,6 +24,7 @@ module Transliteration LOWER_MULTI = { "ье"=>"ie", "ьё"=>"ie", + "сх"=>"skh", } UPPER_SINGLE = { @@ -39,6 +40,7 @@ module Transliteration UPPER_MULTI = { "ЬЕ"=>"IE", "ЬЁ"=>"IE", + "СХ"=>"SKH", } LOWER = (LOWER_SINGLE.merge(LOWER_MULTI)).freeze diff --git a/spec/transliteration_spec.rb b/spec/transliteration_spec.rb index a4f256b..0027eeb 100644 --- a/spec/transliteration_spec.rb +++ b/spec/transliteration_spec.rb @@ -26,6 +26,7 @@ def t(str) t("ш").should == "sh" t("Ш").should == "SH" t("ц").should == "ts" + t("схема").should == "skhema" end it "should properly transliterate mixed russian-english strings" do From 0b5aaa4f83951cb4dfa1bdaaceba03e3dafa9e8b Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Mon, 26 Mar 2012 16:35:35 +0400 Subject: [PATCH 02/20] Add simple detransliteration --- lib/russian.rb | 12 ++++- lib/russian/transliteration.rb | 80 ++++++++++++++++++++++++++++++---- spec/transliteration_spec.rb | 26 ++++++++++- 3 files changed, 108 insertions(+), 10 deletions(-) diff --git a/lib/russian.rb b/lib/russian.rb index a3d70b4..72b3ee1 100644 --- a/lib/russian.rb +++ b/lib/russian.rb @@ -76,7 +76,17 @@ def transliterate(str) Russian::Transliteration.transliterate(str) end alias :translit :transliterate - + + # De-transliteration for russian language + # + # Usage: + # Russian.detranslit("rubin") + # Russian.detransliterate("rubin") + def detransliterate(str) + Russian::Transliteration.detransliterate(str) + end + alias :detranslit :detransliterate + protected # Returns all locale files shipped with library def locale_files diff --git a/lib/russian/transliteration.rb b/lib/russian/transliteration.rb index f8a7479..dff33ba 100644 --- a/lib/russian/transliteration.rb +++ b/lib/russian/transliteration.rb @@ -43,28 +43,92 @@ module Transliteration "СХ"=>"SKH", } + REVERSE_LOWER_SINGLE = { + "a"=>"а","b"=>"б","c"=>"с","d"=>"д", + "e"=>"е","f"=>"ф","g"=>"г","h"=>"х", + "i"=>"и","j"=>"й","k"=>"к","l"=>"л", + "m"=>"м","n"=>"н","o"=>"о","p"=>"п", + "q"=>"к","r"=>"р","s"=>"с","t"=>"т", + "u"=>"у","v"=>"в","w"=>"у","x"=>"кс", + "y"=>"ы","z"=>"з", + } + REVERSE_LOWER_MULTI = { + "sch"=>"щ","skh"=>"сх", + "aya"=>"ая","yaya"=>"яя","oyа"=>"оя","uyа"=>"уя","yyа"=>"ыя","eyа"=>"ея", + "aye"=>"ае","oye"=>"ое","uye"=>"уе","yye"=>"ые","eye"=>"ее", + "oye"=>"ое","oyo"=>"оё", + "ayu"=>"аю","uyu"=>"ую","oyu"=>"ою","uyu"=>"ую","eyu"=>"ею", + "yu"=>"ю","ya"=>"я","yo"=>"ё", + "ju"=>"ю","ja"=>"я","jo"=>"ё", + "yi"=>"ї","ji"=>"ї", + "ay"=>"ай","yay"=>"яй","oy"=>"ой","yoy"=>"ёй","uy"=>"уй","yuy"=>"юй","yy"=>"ый","iy"=>"ий","ey"=>"ей", + "ay"=>"ай","yay"=>"яй","oy"=>"ой","yoy"=>"ёй","uy"=>"уй","yuy"=>"юй","yy"=>"ый","iy"=>"ий","ey"=>"ей", + "ch"=>"ч","zh"=>"ж","sh"=>"ш","ts"=>"ц", + } + + REVERSE_UPPER_SINGLE = { + "A"=>"А","B"=>"Б","C"=>"С","D"=>"Д", + "E"=>"Е","F"=>"Ф","G"=>"Г","H"=>"Х", + "I"=>"И","J"=>"Й","K"=>"К","L"=>"Л", + "M"=>"М","N"=>"Н","O"=>"О","P"=>"П", + "Q"=>"К","R"=>"Р","S"=>"С","T"=>"Т", + "U"=>"У","V"=>"В","W"=>"У","X"=>"КС", + "Y"=>"Ы","Z"=>"З","'"=>"Ъ", + } + REVERSE_UPPER_MULTI = { + "SCH"=>"Щ","SKH"=>"СХ", + "AYA"=>"АЯ","YAYA"=>"ЯЯ","OYА"=>"ОЯ","UYА"=>"УЯ","YYА"=>"ЫЯ","EYА"=>"ЕЯ", + "AYE"=>"АЕ","OYE"=>"ОЕ","UYE"=>"УЕ","YYE"=>"ЫЕ","EYE"=>"ЕЕ", + "OYE"=>"ОЕ","OYO"=>"ОЁ", + "AYU"=>"АЮ","UYU"=>"УЮ","OYU"=>"ОЮ","UYU"=>"УЮ","EYU"=>"ЕЮ", + "YU"=>"Ю","YA"=>"Я","YO"=>"Ё", + "JU"=>"Ю","JA"=>"Я","JO"=>"Ё", + "YI"=>"Ї","JI"=>"Ї", + "AY"=>"АЙ","YAY"=>"ЯЙ","OY"=>"ОЙ","YOY"=>"ЁЙ","UY"=>"УЙ","YUY"=>"ЮЙ","YY"=>"ЫЙ","IY"=>"ИЙ","EY"=>"ЕЙ", + "AY"=>"АЙ","YAY"=>"ЯЙ","OY"=>"ОЙ","YOY"=>"ЁЙ","UY"=>"УЙ","YUY"=>"ЮЙ","YY"=>"ЫЙ","IY"=>"ИЙ","EY"=>"ЕЙ", + "CH"=>"Ч","ZH"=>"Ж","SH"=>"Ш","TS"=>"Ц", + } + LOWER = (LOWER_SINGLE.merge(LOWER_MULTI)).freeze UPPER = (UPPER_SINGLE.merge(UPPER_MULTI)).freeze MULTI_KEYS = (LOWER_MULTI.merge(UPPER_MULTI)).keys.sort_by {|s| s.length}.reverse.freeze + MULTI_KEYS_PATTERN = MULTI_KEYS.join('|').freeze + + REVERSE_LOWER = (REVERSE_LOWER_SINGLE.merge(REVERSE_LOWER_MULTI)).freeze + REVERSE_UPPER = (REVERSE_UPPER_SINGLE.merge(REVERSE_UPPER_MULTI)).freeze + REVERSE_MULTI_KEYS = (REVERSE_LOWER_MULTI.merge(REVERSE_UPPER_MULTI)).keys.sort_by {|s| s.length}.reverse.freeze + REVERSE_MULTI_KEYS_PATTERN = REVERSE_MULTI_KEYS.join('|').freeze # Transliterate a string with russian characters # # Возвращает строку, в которой все буквы русского алфавита заменены на похожую по звучанию латиницу def transliterate(str) - chars = str.scan(%r{#{MULTI_KEYS.join '|'}|\w|.}) + convert(str, UPPER, LOWER, MULTI_KEYS_PATTERN) + end + + # De-transliterate a latin string into cyrillic characters + # + # Возвращает строку, в которой все буквы латинского алфавита заменены на похожие по звучанию из кириллицы + def detransliterate(str) + convert(str, REVERSE_UPPER, REVERSE_LOWER, REVERSE_MULTI_KEYS_PATTERN) + end + + def convert(str, upper, lower, multi_pattern) + chars = str.scan(%r{#{multi_pattern}|\w|.}) result = "" chars.each_with_index do |char, index| - if UPPER.has_key?(char) && LOWER.has_key?(chars[index+1]) + result << \ + if upper.has_key?(char) && lower.has_key?(chars[index+1]) # combined case - result << UPPER[char].downcase.capitalize - elsif UPPER.has_key?(char) - result << UPPER[char] - elsif LOWER.has_key?(char) - result << LOWER[char] + upper[char].downcase.capitalize + elsif upper.has_key?(char) + upper[char] + elsif lower.has_key?(char) + lower[char] else - result << char + char end end diff --git a/spec/transliteration_spec.rb b/spec/transliteration_spec.rb index 0027eeb..47ce979 100644 --- a/spec/transliteration_spec.rb +++ b/spec/transliteration_spec.rb @@ -8,6 +8,10 @@ def t(str) Russian::transliterate(str) end + def dt(str) + Russian::Transliteration::detransliterate(str) + end + %w(transliterate translit).each do |method| it "'#{method}' method should perform transliteration" do str = mock(:str) @@ -16,6 +20,14 @@ def t(str) end end + %w(detransliterate detranslit).each do |method| + it "'#{method}' method should perform de-transliteration" do + str = mock(:str) + Russian::Transliteration.should_receive(:detransliterate).with(str) + Russian.send(method, str) + end + end + # These tests are from rutils, . it "should transliterate properly" do @@ -28,7 +40,19 @@ def t(str) t("ц").should == "ts" t("схема").should == "skhema" end - + + it "should de-transliterate properly" do + dt("Eto prosto nekiy tekst").should == "Ето просто некий текст" + dt("sch").should == "щ" + dt("Zveryo moyo").should == "Зверё моё" + dt("mayskiy izmenyaya moey pamyatyu tvoeyu pohodkoyu vyshla iz maya ob'ektoy matyoy").should == "майский изменяя моей памятю твоею походкою вышла из мая обЪектой матёй" + dt("IE explorer").should == "ИЕ експлорер" + dt("upuscheniy").should == "упущений" + dt("sh").should == "ш" + dt("TS").should == "Ц" + dt("skhema").should == "схема" + end + it "should properly transliterate mixed russian-english strings" do t("Это кусок строки русских букв v peremeshku s latinizey i амперсандом (pozor!) & something").should == "Eto kusok stroki russkih bukv v peremeshku s latinizey i ampersandom (pozor!) & something" From db5b1fcd60cf209f541f390c4717a236659272c2 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Mon, 26 Mar 2012 16:39:11 +0400 Subject: [PATCH 03/20] Remove trailing whitespace --- lib/russian.rb | 20 ++++++------- .../action_view_ext/helpers/date_helper.rb | 20 ++++++------- .../active_model_ext/custom_error_message.rb | 16 +++++------ lib/russian/locale/datetime.rb | 2 +- lib/russian/locale/pluralization.rb | 8 +++--- lib/russian/locale/transliterator.rb | 2 +- lib/russian/russian_rails.rb | 2 +- lib/russian/transliteration.rb | 6 ++-- lib/russian/version.rb | 2 +- spec/i18n/locale/datetime_spec.rb | 18 ++++++------ spec/i18n/locale/pluralization_spec.rb | 6 ++-- spec/russian_spec.rb | 28 +++++++++---------- spec/spec_helper.rb | 2 +- spec/transliteration_spec.rb | 10 +++---- 14 files changed, 71 insertions(+), 71 deletions(-) diff --git a/lib/russian.rb b/lib/russian.rb index 72b3ee1..97438a7 100644 --- a/lib/russian.rb +++ b/lib/russian.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- $KCODE = 'u' if RUBY_VERSION < "1.9" @@ -9,9 +9,9 @@ module Russian extend self - + autoload :Transliteration, 'transliteration' - + # Russian locale LOCALE = :'ru' @@ -25,7 +25,7 @@ def locale LOCALIZE_MONTH_NAMES_MATCH = /(%d|%e)(.*)(%B)/ LOCALIZE_STANDALONE_ABBR_DAY_NAMES_MATCH = /^%a/ LOCALIZE_STANDALONE_DAY_NAMES_MATCH = /^%A/ - + # Init Russian i18n: load all translations shipped with library. def init_i18n I18n::Backend::Simple.send(:include, I18n::Backend::Pluralization) @@ -39,23 +39,23 @@ def init_i18n # See I18n::translate def translate(key, options = {}) I18n.translate(key, options.merge({ :locale => LOCALE })) - end + end alias :t :translate - + # See I18n::localize def localize(object, options = {}) I18n.localize(object, options.merge({ :locale => LOCALE })) end alias :l :localize - + # strftime() proxy with Russian localization def strftime(object, format = :default) localize(object, { :format => format }) end - + # Simple pluralization proxy # - # Usage: + # Usage: # Russian.pluralize(1, "вещь", "вещи", "вещей") # Russian.pluralize(3.14, "вещь", "вещи", "вещей", "вещи") def pluralize(n, *variants) @@ -92,7 +92,7 @@ def detransliterate(str) def locale_files Dir[File.join(File.dirname(__FILE__), "russian", "locale", "**/*")] end - + # Converts an array of pluralization variants to a Hash that can be used # with I18n pluralization. def pluralization_variants_to_hash(*variants) diff --git a/lib/russian/action_view_ext/helpers/date_helper.rb b/lib/russian/action_view_ext/helpers/date_helper.rb index 9a1b598..4ff05a8 100644 --- a/lib/russian/action_view_ext/helpers/date_helper.rb +++ b/lib/russian/action_view_ext/helpers/date_helper.rb @@ -1,20 +1,20 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- -# Заменяет хелпер Rails select_month и метод translated_month_names +# Заменяет хелпер Rails select_month и метод translated_month_names # для поддержки функционала "отдельностоящих имен месяцев". # # Теперь можно использовать и полные, и сокращенные название месяцев в двух вариантах -- контекстном # (по умолчанию) и отдельностоящем (если в текущем языке есть соответствующие переводы). -# Теперь хелперы поддерживают ключ :use_standalone_month_names, хелпер select_month +# Теперь хелперы поддерживают ключ :use_standalone_month_names, хелпер select_month # устанавливает его по умолчанию. # Отдельностоящие имена месяцев также используютс когда указан ключ :discard_day. # # # Replaces Rails select_month helper and translated_month_names private method to provide -# "standalone month names" feature. +# "standalone month names" feature. # # It is now possible to use both abbreviated and full month names in two variants (if current locale provides them). -# All date helpers support :use_standalone_month_names key now, select_month helper sets +# All date helpers support :use_standalone_month_names key now, select_month helper sets # it to true by default. # Standalone month names are also used when :discard_day key is provided. if defined?(ActionView::Helpers::DateTimeSelector) @@ -27,7 +27,7 @@ module DateHelper # instead of names -- set the :use_month_numbers key in +options+ to true for this to happen. If you # want both numbers and names, set the :add_month_numbers key in +options+ to true. If you would prefer # to show month names as abbreviations, set the :use_short_month key in +options+ to true. If you want - # to use your own month names, set the :use_month_names key in +options+ to an array of 12 month names. + # to use your own month names, set the :use_month_names key in +options+ to an array of 12 month names. # You can also choose if you want to use i18n standalone month names or default month names -- you can # force standalone month names usage by using :use_standalone_month_names key. # Override the field name using the :field_name option, 'month' by default. @@ -66,7 +66,7 @@ def select_month(date, options = {}, html_options = {}) DateTimeSelector.new(date, options.merge(:use_standalone_month_names => true), html_options).select_month end end - + class DateTimeSelector #:nodoc: private # Returns translated month names @@ -108,11 +108,11 @@ def translated_month_names key = :'date.month_names' end end - + I18n.translate(key, :locale => @options[:locale]) end - + end end end -end # if defined? \ No newline at end of file +end # if defined? diff --git a/lib/russian/active_model_ext/custom_error_message.rb b/lib/russian/active_model_ext/custom_error_message.rb index 4698409..02c7cf7 100644 --- a/lib/russian/active_model_ext/custom_error_message.rb +++ b/lib/russian/active_model_ext/custom_error_message.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- if defined?(ActiveModel::Errors) module ActiveModel @@ -13,19 +13,19 @@ class Errors # теперь не имеют префикса с названием атрибута если в сообщении об ошибке первым символом указан "^". # # Так, например, - # + # # validates_acceptance_of :accepted_terms, :message => 'нужно принять соглашение' - # + # # даст сообщение - # + # # Accepted terms нужно принять соглашение - # + # # однако, - # + # # validates_acceptance_of :accepted_terms, :message => '^Нужно принять соглашение' - # + # # даст сообщение - # + # # Нужно принять соглашение # # diff --git a/lib/russian/locale/datetime.rb b/lib/russian/locale/datetime.rb index a4e62e3..b763a32 100644 --- a/lib/russian/locale/datetime.rb +++ b/lib/russian/locale/datetime.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- # Context-based month name and day name switching for Russian # diff --git a/lib/russian/locale/pluralization.rb b/lib/russian/locale/pluralization.rb index 20c2a38..7248936 100644 --- a/lib/russian/locale/pluralization.rb +++ b/lib/russian/locale/pluralization.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- # Правило плюрализации для русского языка, взято из CLDR, http://unicode.org/cldr/ # @@ -19,10 +19,10 @@ :ru => { :'i18n' => { :plural => { - :rule => lambda { |n| - n % 10 == 1 && n % 100 != 11 ? :one : [2, 3, 4].include?(n % 10) && ![12, 13, 14].include?(n % 100) ? :few : n % 10 == 0 || [5, 6, 7, 8, 9].include?(n % 10) || [11, 12, 13, 14].include?(n % 100) ? :many : :other + :rule => lambda { |n| + n % 10 == 1 && n % 100 != 11 ? :one : [2, 3, 4].include?(n % 10) && ![12, 13, 14].include?(n % 100) ? :few : n % 10 == 0 || [5, 6, 7, 8, 9].include?(n % 10) || [11, 12, 13, 14].include?(n % 100) ? :many : :other } } } } -} \ No newline at end of file +} diff --git a/lib/russian/locale/transliterator.rb b/lib/russian/locale/transliterator.rb index 1ef2280..14a6266 100644 --- a/lib/russian/locale/transliterator.rb +++ b/lib/russian/locale/transliterator.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- # I18n transliteration delegates to Russian::Transliteration (we're unable # to use common I18n transliteration tables with Russian) diff --git a/lib/russian/russian_rails.rb b/lib/russian/russian_rails.rb index f036547..bc317b6 100644 --- a/lib/russian/russian_rails.rb +++ b/lib/russian/russian_rails.rb @@ -4,5 +4,5 @@ end if defined?(ActionView::Helpers) - require 'action_view_ext/helpers/date_helper' + require 'action_view_ext/helpers/date_helper' end diff --git a/lib/russian/transliteration.rb b/lib/russian/transliteration.rb index dff33ba..9008646 100644 --- a/lib/russian/transliteration.rb +++ b/lib/russian/transliteration.rb @@ -1,7 +1,7 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- module Russian - # Russian transliteration + # Russian transliteration # # Транслитерация для букв русского алфавита module Transliteration @@ -135,4 +135,4 @@ def convert(str, upper, lower, multi_pattern) result end end -end \ No newline at end of file +end diff --git a/lib/russian/version.rb b/lib/russian/version.rb index 716fd76..997d64c 100644 --- a/lib/russian/version.rb +++ b/lib/russian/version.rb @@ -6,4 +6,4 @@ module VERSION STRING = [MAJOR, MINOR, TINY].join('.') end -end \ No newline at end of file +end diff --git a/spec/i18n/locale/datetime_spec.rb b/spec/i18n/locale/datetime_spec.rb index ee2a505..5cc4e45 100644 --- a/spec/i18n/locale/datetime_spec.rb +++ b/spec/i18n/locale/datetime_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- require File.dirname(__FILE__) + '/../../spec_helper' @@ -8,7 +8,7 @@ @date = Date.parse("1985-12-01") @time = Time.local(1985, 12, 01, 16, 05) end - + describe "with date formats" do it "should use default format" do l(@date).should == "01.12.1985" @@ -22,7 +22,7 @@ l(@date, :format => :long).should == "01 декабря 1985" end end - + describe "with date day names" do it "should use day names" do l(@date, :format => "%d %B (%A)").should == "01 декабря (воскресенье)" @@ -33,13 +33,13 @@ l(@date, :format => "%A").should == "Воскресенье" l(@date, :format => "%A, %d %B").should == "Воскресенье, 01 декабря" end - + it "should use abbreviated day names" do l(@date, :format => "%a").should == "Вс" l(@date, :format => "%a, %d %b %Y").should == "Вс, 01 дек. 1985" end end - + describe "with month names" do it "should use month names" do l(@date, :format => "%d %B").should == "01 декабря" @@ -48,12 +48,12 @@ l(@date, :format => "%e %B %Y").should == " 1 декабря 1985" l(@date, :format => "А было тогда %eе число %B %Y").should == "А было тогда 1е число декабря 1985" end - + it "should use standalone month names" do l(@date, :format => "%B").should == "Декабрь" l(@date, :format => "%B %Y").should == "Декабрь 1985" end - + it "should use abbreviated month names" do @date = Date.parse("1985-03-01") l(@date, :format => "%d %b").should == "01 марта" @@ -61,7 +61,7 @@ l(@date, :format => "%d %b").should == "01 марта" l(@date, :format => "%e %b %Y").should == " 1 марта 1985" end - + it "should use standalone abbreviated month names" do @date = Date.parse("1985-03-01") l(@date, :format => "%b").should == "март" @@ -85,7 +85,7 @@ it "should use long format" do l(@time, :format => :long).should == "01 декабря 1985, 16:05" end - + it "should define am and pm" do I18n.backend.translate(Russian.locale, :"time.am").should_not be_nil I18n.backend.translate(Russian.locale, :"time.pm").should_not be_nil diff --git a/spec/i18n/locale/pluralization_spec.rb b/spec/i18n/locale/pluralization_spec.rb index da15865..15a9636 100644 --- a/spec/i18n/locale/pluralization_spec.rb +++ b/spec/i18n/locale/pluralization_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- require File.dirname(__FILE__) + '/../../spec_helper' @@ -10,7 +10,7 @@ end @backend = I18n.backend end - + it "should pluralize correctly" do @backend.send(:pluralize, :'ru', @hash, 1).should == 'one' @backend.send(:pluralize, :'ru', @hash, 2).should == 'few' @@ -25,4 +25,4 @@ @backend.send(:pluralize, :'ru', @hash, 2.31).should == 'other' @backend.send(:pluralize, :'ru', @hash, 3.31).should == 'other' end -end \ No newline at end of file +end diff --git a/spec/russian_spec.rb b/spec/russian_spec.rb index 6226bf3..81e3349 100644 --- a/spec/russian_spec.rb +++ b/spec/russian_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- require File.dirname(__FILE__) + '/spec_helper' @@ -12,38 +12,38 @@ Russian.locale.should == Russian::LOCALE end end - + describe "during i18n initialization" do after(:each) do I18n.load_path = [] Russian.init_i18n end - + it "should keep existing translations while switching backends" do I18n.load_path << File.join(File.dirname(__FILE__), 'fixtures', 'en.yml') Russian.init_i18n I18n.t(:foo, :locale => :'en').should == "bar" end - + it "should keep existing :ru translations while switching backends" do I18n.load_path << File.join(File.dirname(__FILE__), 'fixtures', 'ru.yml') Russian.init_i18n I18n.t(:'date.formats.default', :locale => :'ru').should == "override" end - + it "should NOT set default locale to Russian locale" do locale = I18n.default_locale Russian.init_i18n I18n.default_locale.should == locale end end - + describe "with localize proxy" do before(:each) do @time = mock(:time) @options = { :format => "%d %B %Y" } end - + %w(l localize).each do |method| it "'#{method}' should call I18n backend localize" do I18n.should_receive(:localize).with(@time, @options.merge({ :locale => Russian.locale })) @@ -51,7 +51,7 @@ end end end - + describe "with translate proxy" do before(:all) do @object = :bar @@ -65,7 +65,7 @@ end end end - + describe "strftime" do before(:each) do @time = mock(:time) @@ -76,18 +76,18 @@ Russian.should_receive(:localize).with(@time, { :format => format }) Russian.strftime(@time, format) end - + it "should call localize with object and default format when format is not specified" do Russian.should_receive(:localize).with(@time, { :format => :default }) Russian.strftime(@time) end end - + describe "with pluralization" do %w(p pluralize).each do |method| it "'#{method}' should pluralize with variants given" do variants = %w(вещь вещи вещей вещи) - + Russian.send(method, 1, *variants).should == "вещь" Russian.send(method, 2, *variants).should == 'вещи' Russian.send(method, 3, *variants).should == 'вещи' @@ -99,12 +99,12 @@ Russian.send(method, 131, *variants).should == 'вещь' Russian.send(method, 3.14, *variants).should == 'вещи' end - + it "should raise an exception when first parameter is not a number" do lambda { Russian.send(method, nil, "вещь", "вещи", "вещей") }.should raise_error(ArgumentError) lambda { Russian.send(method, "вещь", "вещь", "вещи", "вещей") }.should raise_error(ArgumentError) end - + it "should raise an exception when there are not enough variants" do lambda { Russian.send(method, 1) }.should raise_error(ArgumentError) lambda { Russian.send(method, 1, "вещь") }.should raise_error(ArgumentError) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index b533437..4d1a1a4 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- $TESTING=true $:.unshift File.join(File.dirname(__FILE__), '..', 'lib') diff --git a/spec/transliteration_spec.rb b/spec/transliteration_spec.rb index 47ce979..363f6a9 100644 --- a/spec/transliteration_spec.rb +++ b/spec/transliteration_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: utf-8 -*- +# -*- encoding: utf-8 -*- require File.dirname(__FILE__) + '/spec_helper' @@ -54,10 +54,10 @@ def dt(str) end it "should properly transliterate mixed russian-english strings" do - t("Это кусок строки русских букв v peremeshku s latinizey i амперсандом (pozor!) & something").should == - "Eto kusok stroki russkih bukv v peremeshku s latinizey i ampersandom (pozor!) & something" + t("Это кусок строки русских букв v peremeshku s latinizey i амперсандом (pozor!) & something").should == + "Eto kusok stroki russkih bukv v peremeshku s latinizey i ampersandom (pozor!) & something" end - + it "should properly transliterate mixed case chars in a string" do t("НЕВЕРОЯТНОЕ УПУЩЕНИЕ").should == "NEVEROYATNOE UPUSCHENIE" t("Невероятное Упущение").should == "Neveroyatnoe Upuschenie" @@ -73,4 +73,4 @@ def dt(str) t("АЛЯБЬЕВ").should == "ALYABIEV" end end -end \ No newline at end of file +end From b48d14e6ae066ddd69b217e5e0c143251f6432a6 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Wed, 28 Mar 2012 00:13:14 +0400 Subject: [PATCH 04/20] Add metaphone coe generation for english words and metaphone-like code for russian words --- lib/russian.rb | 9 +++++ lib/russian/metaphone.rb | 86 ++++++++++++++++++++++++++++++++++++++++ spec/metaphone_spec.rb | 36 +++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 lib/russian/metaphone.rb create mode 100644 spec/metaphone_spec.rb diff --git a/lib/russian.rb b/lib/russian.rb index 97438a7..a4fd3f6 100644 --- a/lib/russian.rb +++ b/lib/russian.rb @@ -11,6 +11,7 @@ module Russian extend self autoload :Transliteration, 'transliteration' + autoload :Metaphone, 'metaphone' # Russian locale LOCALE = :'ru' @@ -87,6 +88,14 @@ def detransliterate(str) end alias :detranslit :detransliterate + # Metaphone code for russian language + # + # Usage: + # Russian.metaphone("рубин") + def metaphone(str) + Russian::Metaphone.generate(str) + end + protected # Returns all locale files shipped with library def locale_files diff --git a/lib/russian/metaphone.rb b/lib/russian/metaphone.rb new file mode 100644 index 0000000..d8ed504 --- /dev/null +++ b/lib/russian/metaphone.rb @@ -0,0 +1,86 @@ +# -*- encoding: utf-8 -*- + +module Russian + # Metaphone code generation for english words and metaphon-like code for russian titles + # + # Генерация метафон-кодов для английский слов и русских названий + # (русская версия заточена и будет дальше дорабатываться именно + # в эту сторону - названия и заголовки) + module Metaphone + extend self + + require "active_support/core_ext/string" + + TRANSFORMATIONS_EN = [ + [/\A[gkp]n/ , 'n'], # gn, kn, or pn at the start turns into 'n' + [/\Ax/ , 's'], # x at the start turns into 's' + [/\Awh/ , 'w'], # wh at the start turns into 'w' + [/mb\z/ , 'm'], # mb at the end turns into 'm' + [/sch/ , 'sk'], # sch sounds like 'sk' + [/x/ , 'ks'], + [/cia/ , 'xia'], # the 'c' -cia- and -ch- sounds like 'x' + [/ch/ , 'xh'], + [/c([iey])/ , 's\1'], # the 'c' -ce-, -ci-, or -cy- sounds like 's' + [/ck/ , 'k'], + [/c/ , 'k'], + [/dg([eiy])/ , 'j\1'], # the 'dg' in -dge-, -dgi-, or -dgy- sounds like 'j' + [/d/ , 't'], + [/gh/ , ''], + [/gned/ , 'ned'], + [/gn((?![aeiou])|(\z))/ , 'n'], + [/g[eiy]/ , 'j'], + [/ph/ , 'f'], + [/[aeiou]h(?![aeoiu])/ , '\1'], # 'h' is silent after a vowel unless it's between vowels + [/q/ , 'k'], + [/s(h|(ia)|(io))/ , 'x\1'], + [/t((ia)|(io))/ , 'x\1'], + [/th/ , '0'], + [/v/ , 'f'], + [/w(?![aeiou])/ , ''], + [/y(?![aeiou])/ , ''], + [/z/ , 's'] + ] + + # english metaphone code was inspired by + # author: AndyV http://snippets.dzone.com/user/AndyV + # source: http://snippets.dzone.com/posts/show/4112 + def generate_en(aWord) + word = aWord.downcase + TRANSFORMATIONS_EN.each { |transform| word.gsub!(transform[0], transform[1]) } + word.squeeze + return (word[0].chr + word[1..word.length-1].gsub(/[aeiou]/, '')).upcase + end + + TRANSFORMATIONS_RU = [ + [ /[дт]с/,'ц' ], # seems this improves matching + [ /[аяоёуюыиэе]/, '' ], # remove vowels + [ /[йъь]/, '' ], # these also ; this also removes all adjactive endings (-ый, -ая, -ое, ...) +# [ /[лмн]/, 'л' ], # seems this improves matching +# [ /ч/, 'ц' ], # this also + [ /сч/,'ш' ], # this also + [ /б/, 'п' ], # map pair letters + [ /в/, 'ф' ], + [ /г/, 'к' ], + [ /д/, 'т' ], + [ /ж/, 'ш' ], + [ /з/, 'с' ], + [ /щ/, 'ш' ], + ] + + def generate_ru(aWord) + word = aWord.mb_chars.downcase.squeeze + TRANSFORMATIONS_RU.each { |transform| word.gsub!(transform[0], transform[1]) } + word.squeeze + end + + # Generates a metaphone code for the given string + # + # Генерирует метафон-код заданной строки (в т.ч. русской, в т.ч. рус+eng) + def generate(str) + str.split(/\s+/).map do |s| + s =~ /[A-Za-z]/ ? generate_en(s =~ /[^A-Za-z]/ ? Russian.translit(s) : s) : generate_ru(s) + end .join(' ').gsub(/\s+/,' ').mb_chars.upcase + end + + end +end diff --git a/spec/metaphone_spec.rb b/spec/metaphone_spec.rb new file mode 100644 index 0000000..12b6cd6 --- /dev/null +++ b/spec/metaphone_spec.rb @@ -0,0 +1,36 @@ +# -*- encoding: utf-8 -*- + +require File.dirname(__FILE__) + '/spec_helper' + +describe Russian do + describe "metaphone" do + def m(str) + Russian::metaphone(str) + end + + it "'metaphone' method should perform metaphone generation code" do + str = mock(:str) + Russian::Metaphone.should_receive(:generate).with(str) + Russian.send(:metaphone, str) + end + + it "should generate proper metaphone" do + m("").should == "" + m("Это просто некий текст").should == "Т ПРСТ НК ТКСТ" + m("сочный").should == "ШН" + m("много букв").should == m("мнока букафф") + m("небольшие опечатки и албанский").should == m("нипалшые опчатги олпансгие") + m("ранний рассвет").should == m("раненное росифатой") + m("китайский ресторан").should == m("кытаски ристаран") + m("макдональдс").should == m("магдоналтс") + m("поцанчик").should == m("патсанчег") + m("сейчас").should == m("щаз") + m("эта").should == m("этот") + end + + it "should generate proper metaphone for mixed russian-english strings" do + m("Это кусок строки русских букв v peremeshku s latinizey i амперсандом (pozor!) & something").should == + "Т КСК СТРК РСКХ ПКФ F PRMXHK S LTNS I МПРСНТМ (PSR!) & SM0NG" + end + end +end From cb089c494f948b740c074268f0f6886a60608343 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Wed, 28 Mar 2012 01:43:20 +0400 Subject: [PATCH 05/20] Improve russian metaphone code generation --- lib/russian/metaphone.rb | 2 +- spec/metaphone_spec.rb | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/russian/metaphone.rb b/lib/russian/metaphone.rb index d8ed504..ea84f62 100644 --- a/lib/russian/metaphone.rb +++ b/lib/russian/metaphone.rb @@ -52,7 +52,7 @@ def generate_en(aWord) end TRANSFORMATIONS_RU = [ - [ /[дт]с/,'ц' ], # seems this improves matching + [ /[дт]ь?с/,'ц' ], # seems this improves matching [ /[аяоёуюыиэе]/, '' ], # remove vowels [ /[йъь]/, '' ], # these also ; this also removes all adjactive endings (-ый, -ая, -ое, ...) # [ /[лмн]/, 'л' ], # seems this improves matching diff --git a/spec/metaphone_spec.rb b/spec/metaphone_spec.rb index 12b6cd6..9d46005 100644 --- a/spec/metaphone_spec.rb +++ b/spec/metaphone_spec.rb @@ -24,6 +24,7 @@ def m(str) m("китайский ресторан").should == m("кытаски ристаран") m("макдональдс").should == m("магдоналтс") m("поцанчик").should == m("патсанчег") + m("напиться").should == m("напицца") m("сейчас").should == m("щаз") m("эта").should == m("этот") end From e8d18eaf7692e95b6c736ae4169d15971550759b Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Wed, 28 Mar 2012 01:59:53 +0400 Subject: [PATCH 06/20] Improve transliteration of pair quotes --- lib/russian/transliteration.rb | 1 + spec/transliteration_spec.rb | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/russian/transliteration.rb b/lib/russian/transliteration.rb index 9008646..f15b20c 100644 --- a/lib/russian/transliteration.rb +++ b/lib/russian/transliteration.rb @@ -20,6 +20,7 @@ module Transliteration "с"=>"s","т"=>"t","у"=>"u","ф"=>"f","х"=>"h", "ц"=>"ts","ч"=>"ch","ш"=>"sh","щ"=>"sch","ъ"=>"'", "ы"=>"y","ь"=>"","э"=>"e","ю"=>"yu","я"=>"ya", + "»"=>"\"","«" => "\"","“"=>"\"","”" => "\"", } LOWER_MULTI = { "ье"=>"ie", diff --git a/spec/transliteration_spec.rb b/spec/transliteration_spec.rb index 363f6a9..e888f90 100644 --- a/spec/transliteration_spec.rb +++ b/spec/transliteration_spec.rb @@ -39,6 +39,7 @@ def dt(str) t("Ш").should == "SH" t("ц").should == "ts" t("схема").should == "skhema" + t("“”«»").should == '""""' end it "should de-transliterate properly" do From 54b8291c234bb86a472fd2b212ae9db14c0c2ffb Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Wed, 28 Mar 2012 23:25:09 +0400 Subject: [PATCH 07/20] Remove all non alphabetical characters from result string --- lib/russian/metaphone.rb | 7 ++++--- spec/metaphone_spec.rb | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/russian/metaphone.rb b/lib/russian/metaphone.rb index ea84f62..bf68d68 100644 --- a/lib/russian/metaphone.rb +++ b/lib/russian/metaphone.rb @@ -34,11 +34,12 @@ module Metaphone [/q/ , 'k'], [/s(h|(ia)|(io))/ , 'x\1'], [/t((ia)|(io))/ , 'x\1'], - [/th/ , '0'], [/v/ , 'f'], [/w(?![aeiou])/ , ''], [/y(?![aeiou])/ , ''], - [/z/ , 's'] + [/z/ , 's'], +# [/th/ , '0'], # <-- zero ?!? + [/th/ , 'z'], # need only latin letters, no digits or smth else ] # english metaphone code was inspired by @@ -79,7 +80,7 @@ def generate_ru(aWord) def generate(str) str.split(/\s+/).map do |s| s =~ /[A-Za-z]/ ? generate_en(s =~ /[^A-Za-z]/ ? Russian.translit(s) : s) : generate_ru(s) - end .join(' ').gsub(/\s+/,' ').mb_chars.upcase + end .join(' ').mb_chars.upcase.gsub(/[^A-ZА-Я]+/, ' ').gsub(/\s+/, ' ').strip end end diff --git a/spec/metaphone_spec.rb b/spec/metaphone_spec.rb index 9d46005..830a698 100644 --- a/spec/metaphone_spec.rb +++ b/spec/metaphone_spec.rb @@ -27,11 +27,12 @@ def m(str) m("напиться").should == m("напицца") m("сейчас").should == m("щаз") m("эта").should == m("этот") + m(" 2эта, \"3В21'").should == "Т Ф" end it "should generate proper metaphone for mixed russian-english strings" do m("Это кусок строки русских букв v peremeshku s latinizey i амперсандом (pozor!) & something").should == - "Т КСК СТРК РСКХ ПКФ F PRMXHK S LTNS I МПРСНТМ (PSR!) & SM0NG" + "Т КСК СТРК РСКХ ПКФ F PRMXHK S LTNS I МПРСНТМ PSR SMZNG" end end end From 93229535d873d222f422e26275806bb40bde469d Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Wed, 28 Mar 2012 23:30:42 +0400 Subject: [PATCH 08/20] Improve tests --- spec/metaphone_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/metaphone_spec.rb b/spec/metaphone_spec.rb index 830a698..5cb829c 100644 --- a/spec/metaphone_spec.rb +++ b/spec/metaphone_spec.rb @@ -18,6 +18,8 @@ def m(str) m("").should == "" m("Это просто некий текст").should == "Т ПРСТ НК ТКСТ" m("сочный").should == "ШН" + m(" 2эта, \"3В21'").should == "Т Ф" + m("иван-и-марья").should == "ФН МР" m("много букв").should == m("мнока букафф") m("небольшие опечатки и албанский").should == m("нипалшые опчатги олпансгие") m("ранний рассвет").should == m("раненное росифатой") @@ -27,7 +29,6 @@ def m(str) m("напиться").should == m("напицца") m("сейчас").should == m("щаз") m("эта").should == m("этот") - m(" 2эта, \"3В21'").should == "Т Ф" end it "should generate proper metaphone for mixed russian-english strings" do From de2a4968a025a385a94f16ebcc8a2d58c262e5a3 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Thu, 29 Mar 2012 02:05:48 +0400 Subject: [PATCH 09/20] Fix wrong metaphone calculation for 'ih' word --- lib/russian/metaphone.rb | 4 ++-- spec/metaphone_spec.rb | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/russian/metaphone.rb b/lib/russian/metaphone.rb index bf68d68..f0c3a05 100644 --- a/lib/russian/metaphone.rb +++ b/lib/russian/metaphone.rb @@ -30,7 +30,7 @@ module Metaphone [/gn((?![aeiou])|(\z))/ , 'n'], [/g[eiy]/ , 'j'], [/ph/ , 'f'], - [/[aeiou]h(?![aeoiu])/ , '\1'], # 'h' is silent after a vowel unless it's between vowels + [/[aeiou]h(?!(?:[aeoiu]|$))/ , '\1'], # 'h' is silent after a vowel unless it's between vowels [/q/ , 'k'], [/s(h|(ia)|(io))/ , 'x\1'], [/t((ia)|(io))/ , 'x\1'], @@ -49,7 +49,7 @@ def generate_en(aWord) word = aWord.downcase TRANSFORMATIONS_EN.each { |transform| word.gsub!(transform[0], transform[1]) } word.squeeze - return (word[0].chr + word[1..word.length-1].gsub(/[aeiou]/, '')).upcase + return word.present? ? (word[0] + word[1..-1].gsub(/[aeiou]/, '')).upcase : "" end TRANSFORMATIONS_RU = [ diff --git a/spec/metaphone_spec.rb b/spec/metaphone_spec.rb index 5cb829c..f177a17 100644 --- a/spec/metaphone_spec.rb +++ b/spec/metaphone_spec.rb @@ -16,6 +16,7 @@ def m(str) it "should generate proper metaphone" do m("").should == "" + m("ih").should == "IH" m("Это просто некий текст").should == "Т ПРСТ НК ТКСТ" m("сочный").should == "ШН" m(" 2эта, \"3В21'").should == "Т Ф" From 2b70189b1285e6ee763855728732311906d6a9e5 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Thu, 29 Mar 2012 23:05:01 +0400 Subject: [PATCH 10/20] Add support of standard russian/english keyboard layout --- lib/russian.rb | 16 ++++++++++++++++ lib/russian/transliteration.rb | 17 +++++++++++++++++ spec/transliteration_spec.rb | 29 +++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/lib/russian.rb b/lib/russian.rb index a4fd3f6..d1f3397 100644 --- a/lib/russian.rb +++ b/lib/russian.rb @@ -96,6 +96,22 @@ def metaphone(str) Russian::Metaphone.generate(str) end + # Change the input string as it would be typed in the standard russian keyboard layout + # + # Usage: + # Russian.layout_rus("hemby") # рубин + def layout_rus(str) + Russian::Transliteration.layout_rus(str) + end + + # Change the input string as it would be typed in the standard english keyboard layout + # + # Usage: + # Russian.layout_eng("дум") # lev + def layout_eng(str) + Russian::Transliteration.layout_eng(str) + end + protected # Returns all locale files shipped with library def locale_files diff --git a/lib/russian/transliteration.rb b/lib/russian/transliteration.rb index f15b20c..377f45d 100644 --- a/lib/russian/transliteration.rb +++ b/lib/russian/transliteration.rb @@ -135,5 +135,22 @@ def convert(str, upper, lower, multi_pattern) result end + + LAYOUT_RUS_UPPER = 'ЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮЁ№"' + LAYOUT_ENG_UPPER = 'QWERTYUIOP{}ASDFGHJKL:"ZXCVBNM<>~#@' + + LAYOUT_RUS_LOWER = 'йцукенгшщзхъфывапролджэячсмитьбюё' + LAYOUT_ENG_LOWER = "qwertyuiop[]asdfghjkl;'zxcvbnm,.`" + + LAYOUT_RUS = (LAYOUT_RUS_LOWER + LAYOUT_RUS_UPPER).freeze + LAYOUT_ENG = (LAYOUT_ENG_LOWER + LAYOUT_ENG_UPPER).freeze + + def layout_rus(str) + str.to_s.tr(LAYOUT_ENG, LAYOUT_RUS) + end + + def layout_eng(str) + str.to_s.tr(LAYOUT_RUS, LAYOUT_ENG) + end end end diff --git a/spec/transliteration_spec.rb b/spec/transliteration_spec.rb index e888f90..5144bc7 100644 --- a/spec/transliteration_spec.rb +++ b/spec/transliteration_spec.rb @@ -73,5 +73,34 @@ def dt(str) t("Алябьев").should == "Alyabiev" t("АЛЯБЬЕВ").should == "ALYABIEV" end + + %w(rus eng).each do |lang| + it "'layout_#{lang}' method should change string as it would be typed in '#{lang}' keyboard layout" do + str = mock(:str) + Russian.send("layout_"+lang, str) + end + end + + def lr(str) + Russian.layout_rus(str) + end + + def le(str) + Russian.layout_eng(str) + end + + it "should change input layout to standard russian" do + lr('ntcn').should == "тест" + lr('gJKBNBYAJHVFWBz').should == "пОЛИТИНФОРМАЦИя" + lr('~@#:"M<>').should == 'Ё"№ЖЭЬБЮ' + lr("`;',.").should == 'ёжэбю' + end + + it "should change input layout to standard english" do + le('еуые').should == "test" + le('сщТЕФьШтфешЩт').should == "coNTAmInatiOn" + le('Ё"№ЖЭЬБЮ').should == '~@#:"M<>' + le('ёжэбю').should == "`;',." + end end end From abd4c8c8fe9cfd4c4fc2dbc8cf30bbc12568be83 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Fri, 30 Mar 2012 15:22:09 +0400 Subject: [PATCH 11/20] Improve metaphone similarity matching for russian --- lib/russian/metaphone.rb | 12 +++++------- spec/metaphone_spec.rb | 5 +++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/russian/metaphone.rb b/lib/russian/metaphone.rb index f0c3a05..9b45963 100644 --- a/lib/russian/metaphone.rb +++ b/lib/russian/metaphone.rb @@ -54,18 +54,16 @@ def generate_en(aWord) TRANSFORMATIONS_RU = [ [ /[дт]ь?с/,'ц' ], # seems this improves matching - [ /[аяоёуюыиэе]/, '' ], # remove vowels + [ /[аяоёуюыиэеї]/, '' ],# remove vowels [ /[йъь]/, '' ], # these also ; this also removes all adjactive endings (-ый, -ая, -ое, ...) -# [ /[лмн]/, 'л' ], # seems this improves matching -# [ /ч/, 'ц' ], # this also - [ /сч/,'ш' ], # this also + [ /сч/,'ш' ], # seems this improves matching +# [ /ч/, 'ц' ], + [ /[ґгк]/, 'х' ], [ /б/, 'п' ], # map pair letters [ /в/, 'ф' ], - [ /г/, 'к' ], [ /д/, 'т' ], - [ /ж/, 'ш' ], + [ /[жщ]/, 'ш' ], [ /з/, 'с' ], - [ /щ/, 'ш' ], ] def generate_ru(aWord) diff --git a/spec/metaphone_spec.rb b/spec/metaphone_spec.rb index f177a17..dd86032 100644 --- a/spec/metaphone_spec.rb +++ b/spec/metaphone_spec.rb @@ -17,7 +17,7 @@ def m(str) it "should generate proper metaphone" do m("").should == "" m("ih").should == "IH" - m("Это просто некий текст").should == "Т ПРСТ НК ТКСТ" + m("Это просто некий текст").should == "Т ПРСТ НХ ТХСТ" m("сочный").should == "ШН" m(" 2эта, \"3В21'").should == "Т Ф" m("иван-и-марья").should == "ФН МР" @@ -25,6 +25,7 @@ def m(str) m("небольшие опечатки и албанский").should == m("нипалшые опчатги олпансгие") m("ранний рассвет").should == m("раненное росифатой") m("китайский ресторан").should == m("кытаски ристаран") + m("парикмахерская").should == m("порехмакерская") m("макдональдс").should == m("магдоналтс") m("поцанчик").should == m("патсанчег") m("напиться").should == m("напицца") @@ -34,7 +35,7 @@ def m(str) it "should generate proper metaphone for mixed russian-english strings" do m("Это кусок строки русских букв v peremeshku s latinizey i амперсандом (pozor!) & something").should == - "Т КСК СТРК РСКХ ПКФ F PRMXHK S LTNS I МПРСНТМ PSR SMZNG" + "Т ХСХ СТРХ РСХ ПХФ F PRMXHK S LTNS I МПРСНТМ PSR SMZNG" end end end From dec43ce598449e2fbb89ce096c626d2312d2e117 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Fri, 30 Mar 2012 22:37:04 +0400 Subject: [PATCH 12/20] =?UTF-8?q?Improve=20metaphone=20for=20'pizza'=20and?= =?UTF-8?q?=20'=D0=BF=D0=B8=D1=86=D1=86=D0=B0'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/russian/metaphone.rb | 3 ++- spec/metaphone_spec.rb | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/russian/metaphone.rb b/lib/russian/metaphone.rb index 9b45963..bdf03ef 100644 --- a/lib/russian/metaphone.rb +++ b/lib/russian/metaphone.rb @@ -54,6 +54,7 @@ def generate_en(aWord) TRANSFORMATIONS_RU = [ [ /[дт]ь?с/,'ц' ], # seems this improves matching + [ /зз/, 'цц' ], # for metaphone(detranslit(pizza)) == metaphone(пицца) [ /[аяоёуюыиэеї]/, '' ],# remove vowels [ /[йъь]/, '' ], # these also ; this also removes all adjactive endings (-ый, -ая, -ое, ...) [ /сч/,'ш' ], # seems this improves matching @@ -67,7 +68,7 @@ def generate_en(aWord) ] def generate_ru(aWord) - word = aWord.mb_chars.downcase.squeeze + word = aWord.mb_chars.downcase TRANSFORMATIONS_RU.each { |transform| word.gsub!(transform[0], transform[1]) } word.squeeze end diff --git a/spec/metaphone_spec.rb b/spec/metaphone_spec.rb index dd86032..a96eb7b 100644 --- a/spec/metaphone_spec.rb +++ b/spec/metaphone_spec.rb @@ -23,8 +23,9 @@ def m(str) m("иван-и-марья").should == "ФН МР" m("много букв").should == m("мнока букафф") m("небольшие опечатки и албанский").should == m("нипалшые опчатги олпансгие") - m("ранний рассвет").should == m("раненное росифатой") m("китайский ресторан").should == m("кытаски ристаран") + m("ранний рассвет").should == m("раненное росифатой") + m(Russian.detranslit("pizza")).should == m("пицца") m("парикмахерская").should == m("порехмакерская") m("макдональдс").should == m("магдоналтс") m("поцанчик").should == m("патсанчег") From 5a7b532806f99e1325ac9ec5cbcd21b805832a05 Mon Sep 17 00:00:00 2001 From: Lev Rechnik Date: Thu, 5 Apr 2012 12:24:24 +0400 Subject: [PATCH 13/20] =?UTF-8?q?Add=20detransliteration=20rule=20'ph'->'?= =?UTF-8?q?=D1=84'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/russian/transliteration.rb | 1 + spec/transliteration_spec.rb | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/russian/transliteration.rb b/lib/russian/transliteration.rb index 377f45d..b16af03 100644 --- a/lib/russian/transliteration.rb +++ b/lib/russian/transliteration.rb @@ -65,6 +65,7 @@ module Transliteration "ay"=>"ай","yay"=>"яй","oy"=>"ой","yoy"=>"ёй","uy"=>"уй","yuy"=>"юй","yy"=>"ый","iy"=>"ий","ey"=>"ей", "ay"=>"ай","yay"=>"яй","oy"=>"ой","yoy"=>"ёй","uy"=>"уй","yuy"=>"юй","yy"=>"ый","iy"=>"ий","ey"=>"ей", "ch"=>"ч","zh"=>"ж","sh"=>"ш","ts"=>"ц", + "ph"=>"ф", } REVERSE_UPPER_SINGLE = { diff --git a/spec/transliteration_spec.rb b/spec/transliteration_spec.rb index 5144bc7..9d4b12f 100644 --- a/spec/transliteration_spec.rb +++ b/spec/transliteration_spec.rb @@ -52,6 +52,7 @@ def dt(str) dt("sh").should == "ш" dt("TS").should == "Ц" dt("skhema").should == "схема" + dt("philosophy").should == "философы" end it "should properly transliterate mixed russian-english strings" do From 6a02fbc20b62f23f729c7c5bc2910f40e3958bbb Mon Sep 17 00:00:00 2001 From: lev Date: Sat, 26 Jan 2013 19:24:16 +0400 Subject: [PATCH 14/20] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D0=BB=20=D0=B2=D0=BE=D0=B7=D0=BC=D0=BE=D0=B6=D0=BD=D0=BE=D1=81?= =?UTF-8?q?=D1=82=D1=8C=20=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D1=80=D1=82=D0=B8?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B0=D1=82=D1=8C=20=D0=BC=D0=B0=D1=81=D1=81?= =?UTF-8?q?=D0=B8=D0=B2=20=D1=81=D1=82=D1=80=D0=BE=D0=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/russian/transliteration.rb | 10 +++++++++- spec/transliteration_spec.rb | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/russian/transliteration.rb b/lib/russian/transliteration.rb index b16af03..72d1f38 100644 --- a/lib/russian/transliteration.rb +++ b/lib/russian/transliteration.rb @@ -115,7 +115,15 @@ def detransliterate(str) convert(str, REVERSE_UPPER, REVERSE_LOWER, REVERSE_MULTI_KEYS_PATTERN) end - def convert(str, upper, lower, multi_pattern) + def convert(src, *params) + send( 'convert_' + src.class.name.downcase, src, *params) + end + + def convert_array(arr, *params) + arr.map { |a| convert(a, *params) } + end + + def convert_string(str, upper, lower, multi_pattern) chars = str.scan(%r{#{multi_pattern}|\w|.}) result = "" diff --git a/spec/transliteration_spec.rb b/spec/transliteration_spec.rb index 9d4b12f..60afa46 100644 --- a/spec/transliteration_spec.rb +++ b/spec/transliteration_spec.rb @@ -75,6 +75,11 @@ def dt(str) t("АЛЯБЬЕВ").should == "ALYABIEV" end + it "should process arrays of strings" do + arr = %w(раз два три) + t(arr).should == arr.map { |a| t(a) } + end + %w(rus eng).each do |lang| it "'layout_#{lang}' method should change string as it would be typed in '#{lang}' keyboard layout" do str = mock(:str) From fe7eb1597748b86ba71b6f215122e5caf8c6410d Mon Sep 17 00:00:00 2001 From: lightalloy Date: Sun, 13 May 2012 15:40:15 +0400 Subject: [PATCH 15/20] fix for date localization (full month names with %-d) --- lib/russian.rb | 2 +- spec/i18n/locale/datetime_spec.rb | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/russian.rb b/lib/russian.rb index d1f3397..b03a2cd 100644 --- a/lib/russian.rb +++ b/lib/russian.rb @@ -23,7 +23,7 @@ def locale # Regexp machers for context-based russian month names and day names translation LOCALIZE_ABBR_MONTH_NAMES_MATCH = /(%d|%e)(.*)(%b)/ - LOCALIZE_MONTH_NAMES_MATCH = /(%d|%e)(.*)(%B)/ + LOCALIZE_MONTH_NAMES_MATCH = /(%-?d|%e)(.*)(%B)/ LOCALIZE_STANDALONE_ABBR_DAY_NAMES_MATCH = /^%a/ LOCALIZE_STANDALONE_DAY_NAMES_MATCH = /^%A/ diff --git a/spec/i18n/locale/datetime_spec.rb b/spec/i18n/locale/datetime_spec.rb index 5cc4e45..7b97db3 100644 --- a/spec/i18n/locale/datetime_spec.rb +++ b/spec/i18n/locale/datetime_spec.rb @@ -43,6 +43,7 @@ describe "with month names" do it "should use month names" do l(@date, :format => "%d %B").should == "01 декабря" + l(@date, :format => "%-d %B").should == "1 декабря" l(@date, :format => "%e %B %Y").should == " 1 декабря 1985" l(@date, :format => "%d %B").should == "01 декабря" l(@date, :format => "%e %B %Y").should == " 1 декабря 1985" From c6d71c887980e0b60dfc49f5e44c364899f475d8 Mon Sep 17 00:00:00 2001 From: aai10 Date: Thu, 17 May 2012 15:18:56 +0400 Subject: [PATCH 16/20] =?UTF-8?q?=D0=98=D1=81=D0=BF=D1=80=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D0=BA=D0=BE=D1=80?= =?UTF-8?q?=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BE=D1=82?= =?UTF-8?q?=D0=BE=D0=B1=D1=80=D0=B0=D0=B6=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BD?= =?UTF-8?q?=D0=B0=D0=B7=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BC=D0=B5=D1=81?= =?UTF-8?q?=D1=8F=D1=86=D0=B0=20=D0=B2=20=D1=81=D0=BB=D1=83=D1=87=D0=B0?= =?UTF-8?q?=D0=B5=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BA=D0=BB=D1=8E=D1=87=D0=B0=20%-d?= =?UTF-8?q?=20=D0=B8=D0=BB=D0=B8=20%1d:=20Russian::strftime(Time.now-10000?= =?UTF-8?q?00,"%d=20%b=20%Y")=20=3D>=20"06=20=D0=BC=D0=B0=D1=8F=202012"=20?= =?UTF-8?q?Russian::strftime(Time.now-1000000,"%-d=20%b=20%Y")=20=3D>=20"6?= =?UTF-8?q?=20=D0=BC=D0=B0=D0=B9=202012"=20Russian::strftime(Time.now-1000?= =?UTF-8?q?000,"%1d=20%b=20%Y")=20=3D>=20"6=20=D0=BC=D0=B0=D0=B9=202012"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/russian.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/russian.rb b/lib/russian.rb index b03a2cd..7b1631b 100644 --- a/lib/russian.rb +++ b/lib/russian.rb @@ -22,8 +22,8 @@ def locale end # Regexp machers for context-based russian month names and day names translation - LOCALIZE_ABBR_MONTH_NAMES_MATCH = /(%d|%e)(.*)(%b)/ - LOCALIZE_MONTH_NAMES_MATCH = /(%-?d|%e)(.*)(%B)/ + LOCALIZE_ABBR_MONTH_NAMES_MATCH = /(%[-\d]?d|%e)(.*)(%b)/ + LOCALIZE_MONTH_NAMES_MATCH = /(%[-\d]?d|%e)(.*)(%B)/ LOCALIZE_STANDALONE_ABBR_DAY_NAMES_MATCH = /^%a/ LOCALIZE_STANDALONE_DAY_NAMES_MATCH = /^%A/ From 9ee3342734609d8ef511f343c1c7c7346499cd3a Mon Sep 17 00:00:00 2001 From: Yaroslav Markin Date: Thu, 17 May 2012 21:43:22 +0400 Subject: [PATCH 17/20] %1d %B specs --- spec/i18n/locale/datetime_spec.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/i18n/locale/datetime_spec.rb b/spec/i18n/locale/datetime_spec.rb index 7b97db3..bfa76f4 100644 --- a/spec/i18n/locale/datetime_spec.rb +++ b/spec/i18n/locale/datetime_spec.rb @@ -44,6 +44,8 @@ it "should use month names" do l(@date, :format => "%d %B").should == "01 декабря" l(@date, :format => "%-d %B").should == "1 декабря" + l(@date, :format => "%1d %B").should == "1 декабря" + l(@date, :format => "%2d %B").should == "01 декабря" l(@date, :format => "%e %B %Y").should == " 1 декабря 1985" l(@date, :format => "%d %B").should == "01 декабря" l(@date, :format => "%e %B %Y").should == " 1 декабря 1985" From 59d67814a760d654d3ee5751b0924b64e1747fbe Mon Sep 17 00:00:00 2001 From: Yaroslav Markin Date: Thu, 17 May 2012 21:57:49 +0400 Subject: [PATCH 18/20] Proper 1.9.3 strftime specs --- spec/i18n/locale/datetime_spec.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/spec/i18n/locale/datetime_spec.rb b/spec/i18n/locale/datetime_spec.rb index bfa76f4..fe65453 100644 --- a/spec/i18n/locale/datetime_spec.rb +++ b/spec/i18n/locale/datetime_spec.rb @@ -44,8 +44,12 @@ it "should use month names" do l(@date, :format => "%d %B").should == "01 декабря" l(@date, :format => "%-d %B").should == "1 декабря" - l(@date, :format => "%1d %B").should == "1 декабря" - l(@date, :format => "%2d %B").should == "01 декабря" + + if RUBY_VERSION > "1.9.2" + l(@date, :format => "%1d %B").should == "1 декабря" + l(@date, :format => "%2d %B").should == "01 декабря" + end + l(@date, :format => "%e %B %Y").should == " 1 декабря 1985" l(@date, :format => "%d %B").should == "01 декабря" l(@date, :format => "%e %B %Y").should == " 1 декабря 1985" From 3902efea2553298678f4831f48906103dcaca6c6 Mon Sep 17 00:00:00 2001 From: "Andrey A.I. Sitnik" Date: Thu, 11 Apr 2013 23:53:43 +0300 Subject: [PATCH 19/20] Use correct number separator --- lib/russian/locale/actionview.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/russian/locale/actionview.yml b/lib/russian/locale/actionview.yml index 20d5ff1..4207a47 100644 --- a/lib/russian/locale/actionview.yml +++ b/lib/russian/locale/actionview.yml @@ -7,7 +7,7 @@ ru: # These are also the defaults for 'currency', 'percentage', 'precision', and 'human' format: # Sets the separator between the units, for more precision (e.g. 1.0 / 2.0 == 0.5) - separator: "." + separator: "," # Delimets thousands (e.g. 1,000,000 is a million) (always in groups of three) delimiter: " " # Number of decimals, behind the separator (the number 1 with a precision of 2 gives: 1.00) From 156f9bee23b699754235444b6915fca8df555595 Mon Sep 17 00:00:00 2001 From: lev Date: Tue, 13 May 2014 19:39:35 +0400 Subject: [PATCH 20/20] Fix message for "confirmation" --- lib/russian/locale/activemodel.yml | 2 +- lib/russian/locale/activerecord.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/russian/locale/activemodel.yml b/lib/russian/locale/activemodel.yml index 5066327..cda2797 100644 --- a/lib/russian/locale/activemodel.yml +++ b/lib/russian/locale/activemodel.yml @@ -6,7 +6,7 @@ ru: inclusion: "имеет непредусмотренное значение" exclusion: "имеет зарезервированное значение" invalid: "имеет неверное значение" - confirmation: "не совпадает с подтверждением" + confirmation: "не совпадает с подтверждаемым значением" accepted: "нужно подтвердить" empty: "не может быть пустым" blank: "не может быть пустым" diff --git a/lib/russian/locale/activerecord.yml b/lib/russian/locale/activerecord.yml index 8b67531..4747f9d 100644 --- a/lib/russian/locale/activerecord.yml +++ b/lib/russian/locale/activerecord.yml @@ -19,7 +19,7 @@ ru: inclusion: "имеет непредусмотренное значение" exclusion: "имеет зарезервированное значение" invalid: "имеет неверное значение" - confirmation: "не совпадает с подтверждением" + confirmation: "не совпадает с подтверждаемым значением" accepted: "нужно подтвердить" empty: "не может быть пустым" blank: "не может быть пустым"