From db792cdd82eb782e98d934995964809d9e8cb77d Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Fri, 25 Jul 2014 17:32:53 +0900 Subject: [PATCH] Set charset/collation properly for each text column if using MySQL. With this change, Huginn is able to store up to 4-byte UTF-8 characters in its database. This should fix #286. --- .../initializers/ar_mysql_column_charset.rb | 3 + .../20140813110107_set_charset_for_mysql.rb | 74 +++++++++++++++++ db/schema.rb | 68 ++++++++-------- lib/ar_mysql_column_charset.rb | 80 +++++++++++++++++++ 4 files changed, 191 insertions(+), 34 deletions(-) create mode 100644 config/initializers/ar_mysql_column_charset.rb create mode 100644 db/migrate/20140813110107_set_charset_for_mysql.rb create mode 100644 lib/ar_mysql_column_charset.rb diff --git a/config/initializers/ar_mysql_column_charset.rb b/config/initializers/ar_mysql_column_charset.rb new file mode 100644 index 00000000..cade648e --- /dev/null +++ b/config/initializers/ar_mysql_column_charset.rb @@ -0,0 +1,3 @@ +ActiveSupport.on_load :active_record do + require 'ar_mysql_column_charset' +end diff --git a/db/migrate/20140813110107_set_charset_for_mysql.rb b/db/migrate/20140813110107_set_charset_for_mysql.rb new file mode 100644 index 00000000..2fc9e5ee --- /dev/null +++ b/db/migrate/20140813110107_set_charset_for_mysql.rb @@ -0,0 +1,74 @@ +class SetCharsetForMysql < ActiveRecord::Migration + def all_models + @all_models ||= [ + Agent, + AgentLog, + Contact, + Event, + Link, + Scenario, + ScenarioMembership, + User, + UserCredential, + Delayed::Job, + ] + end + + def change + conn = ActiveRecord::Base.connection + + # This is migration is for MySQL only. + return unless conn.is_a?(ActiveRecord::ConnectionAdapters::AbstractMysqlAdapter) + + reversible do |dir| + dir.up do + all_models.each { |model| + table_name = model.table_name + + # `contacts` may not exist + next unless connection.table_exists? table_name + + model.columns.each { |column| + name = column.name + type = column.type + limit = column.limit + options = { + limit: limit, + null: column.null, + default: column.default, + } + + case type + when :string, :text + options.update(charset: 'utf8', collation: 'utf8_general_ci') + case name + when 'username' + options.update(limit: 767 / 4, charset: 'utf8mb4', collation: 'utf8mb4_general_ci') + when 'message', 'options', 'name', 'memory', + 'handler', 'last_error', 'payload', 'description' + options.update(charset: 'utf8mb4', collation: 'utf8mb4_bin') + when 'type', 'schedule', 'mode', 'email', + 'invitation_code', 'reset_password_token' + options.update(collation: 'utf8_bin') + when 'guid', 'encrypted_password' + options.update(charset: 'ascii', collation: 'ascii_bin') + end + else + next + end + + change_column table_name, name, type, options + } + + execute 'ALTER TABLE %s CHARACTER SET utf8 COLLATE utf8_general_ci' % table_name + } + + execute 'ALTER DATABASE %s CHARACTER SET utf8 COLLATE utf8_general_ci' % conn.current_database + end + + dir.down do + # Do nada; no use to go back + end + end + end +end diff --git a/db/schema.rb b/db/schema.rb index fbfe18d1..03d1b8ab 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,14 +11,14 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20140723110551) do +ActiveRecord::Schema.define(version: 20140813110107) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" create_table "agent_logs", force: true do |t| t.integer "agent_id", null: false - t.text "message", null: false + t.text "message", limit: 16777215, null: false, charset: "utf8mb4", collation: "utf8mb4_bin" t.integer "level", default: 3, null: false t.integer "inbound_event_id" t.integer "outbound_event_id" @@ -28,17 +28,17 @@ ActiveRecord::Schema.define(version: 20140723110551) do create_table "agents", force: true do |t| t.integer "user_id" - t.text "options" - t.string "type" - t.string "name" - t.string "schedule" + t.text "options", limit: 16777215, charset: "utf8mb4", collation: "utf8mb4_bin" + t.string "type", collation: "utf8_bin" + t.string "name", charset: "utf8mb4", collation: "utf8mb4_bin" + t.string "schedule", collation: "utf8_bin" t.integer "events_count" t.datetime "last_check_at" t.datetime "last_receive_at" t.integer "last_checked_event_id" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.text "memory" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.text "memory", limit: 2147483647, charset: "utf8mb4", collation: "utf8mb4_bin" t.datetime "last_web_request_at" t.integer "keep_events_for", default: 0, null: false t.datetime "last_event_at" @@ -46,7 +46,7 @@ ActiveRecord::Schema.define(version: 20140723110551) do t.boolean "propagate_immediately", default: false, null: false t.boolean "disabled", default: false, null: false t.integer "service_id" - t.string "guid", null: false + t.string "guid", null: false, charset: "ascii", collation: "ascii_bin" end add_index "agents", ["guid"], name: "index_agents_on_guid", using: :btree @@ -55,10 +55,10 @@ ActiveRecord::Schema.define(version: 20140723110551) do add_index "agents", ["user_id", "created_at"], name: "index_agents_on_user_id_and_created_at", using: :btree create_table "delayed_jobs", force: true do |t| - t.integer "priority", default: 0 - t.integer "attempts", default: 0 - t.text "handler" - t.text "last_error" + t.integer "priority", default: 0 + t.integer "attempts", default: 0 + t.text "handler", limit: 16777215, charset: "utf8mb4", collation: "utf8mb4_bin" + t.text "last_error", limit: 16777215, charset: "utf8mb4", collation: "utf8mb4_bin" t.datetime "run_at" t.datetime "locked_at" t.datetime "failed_at" @@ -73,11 +73,11 @@ ActiveRecord::Schema.define(version: 20140723110551) do create_table "events", force: true do |t| t.integer "user_id" t.integer "agent_id" - t.decimal "lat", precision: 15, scale: 10 - t.decimal "lng", precision: 15, scale: 10 - t.text "payload" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false + t.decimal "lat", precision: 15, scale: 10 + t.decimal "lng", precision: 15, scale: 10 + t.text "payload", limit: 2147483647, charset: "utf8mb4", collation: "utf8mb4_bin" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false t.datetime "expires_at" end @@ -107,13 +107,13 @@ ActiveRecord::Schema.define(version: 20140723110551) do add_index "scenario_memberships", ["scenario_id"], name: "index_scenario_memberships_on_scenario_id", using: :btree create_table "scenarios", force: true do |t| - t.string "name", null: false + t.string "name", null: false, charset: "utf8mb4", collation: "utf8mb4_bin" t.integer "user_id", null: false t.datetime "created_at" t.datetime "updated_at" - t.text "description" + t.text "description", charset: "utf8mb4", collation: "utf8mb4_bin" t.boolean "public", default: false, null: false - t.string "guid", null: false + t.string "guid", null: false, charset: "ascii", collation: "ascii_bin" t.string "source_url" end @@ -142,31 +142,31 @@ ActiveRecord::Schema.define(version: 20140723110551) do t.text "credential_value", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.string "mode", default: "text", null: false + t.string "mode", default: "text", null: false, collation: "utf8_bin" end add_index "user_credentials", ["user_id", "credential_name"], name: "index_user_credentials_on_user_id_and_credential_name", unique: true, using: :btree create_table "users", force: true do |t| - t.string "email", default: "", null: false - t.string "encrypted_password", default: "", null: false - t.string "reset_password_token" + t.string "email", default: "", null: false, collation: "utf8_bin" + t.string "encrypted_password", default: "", null: false, charset: "ascii", collation: "ascii_bin" + t.string "reset_password_token", collation: "utf8_bin" t.datetime "reset_password_sent_at" t.datetime "remember_created_at" - t.integer "sign_in_count", default: 0 + t.integer "sign_in_count", default: 0 t.datetime "current_sign_in_at" t.datetime "last_sign_in_at" t.string "current_sign_in_ip" t.string "last_sign_in_ip" - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.boolean "admin", default: false, null: false - t.integer "failed_attempts", default: 0 + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.boolean "admin", default: false, null: false + t.integer "failed_attempts", default: 0 t.string "unlock_token" t.datetime "locked_at" - t.string "username", null: false - t.string "invitation_code", null: false - t.integer "scenario_count", default: 0, null: false + t.string "username", limit: 191, null: false, charset: "utf8mb4", collation: "utf8mb4_general_ci" + t.string "invitation_code", null: false, collation: "utf8_bin" + t.integer "scenario_count", default: 0, null: false end add_index "users", ["email"], name: "index_users_on_email", unique: true, using: :btree diff --git a/lib/ar_mysql_column_charset.rb b/lib/ar_mysql_column_charset.rb new file mode 100644 index 00000000..18710918 --- /dev/null +++ b/lib/ar_mysql_column_charset.rb @@ -0,0 +1,80 @@ +require 'active_record' + +module ActiveRecord::ConnectionAdapters + class ColumnDefinition + module CharsetSupport + attr_accessor :charset, :collation + end + + prepend CharsetSupport + end + + class TableDefinition + module CharsetSupport + def new_column_definition(name, type, options) + column = super + column.charset = options[:charset] + column.collation = options[:collation] + column + end + end + + prepend CharsetSupport + end + + class AbstractMysqlAdapter + module CharsetSupport + def prepare_column_options(column, types) + spec = super + conn = ActiveRecord::Base.connection + spec[:charset] = column.charset.inspect if column.charset && column.charset != conn.charset + spec[:collation] = column.collation.inspect if column.collation && column.collation != conn.collation + spec + end + + def migration_keys + super + [:charset, :collation] + end + end + + prepend CharsetSupport + + class SchemaCreation + module CharsetSupport + def column_options(o) + column_options = super + column_options[:charset] = o.charset unless o.charset.nil? + column_options[:collation] = o.collation unless o.collation.nil? + column_options + end + + def add_column_options!(sql, options) + if options[:charset] + sql << " CHARACTER SET #{options[:charset]}" + end + + if options[:collation] + sql << " COLLATE #{options[:collation]}" + end + + super + end + end + + prepend CharsetSupport + end + + class Column + module CharsetSupport + attr_reader :charset + + def initialize(*args) + super + @charset = @collation[/\A[^_]+/] unless @collation.nil? + end + end + + prepend CharsetSupport + end + end +end if Module.method_defined?(:prepend) # ruby >=2.0