Archive for the ‘Ruby’ Category

Quick (and dirty) Patch for Ruby Enterprise Edition 2011.03 to Prevent Hash Collision Attacks

Декабрь 29th, 2011

Since there is no patch for Ruby Enterprise Edition 2011.03 to prevent the Hash Collision DoS Attack, I’ve quickly ported ruby 1.8.7 patchlevel 357 patch. Here it is:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
From e19bd3eaa8bd71cfc9e5bf436527f015b093f31e Mon Sep 17 00:00:00 2001
From: shyouhei <shyouhei@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Wed, 28 Dec 2011 12:47:15 +0000
Subject: [PATCH] -This line, and those below, will be ignored--

M    ruby_1_8_7/inits.c
M    ruby_1_8_7/string.c
M    ruby_1_8_7/st.c
M    ruby_1_8_7/test/ruby/test_string.rb
M    ruby_1_8_7/random.c


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8_7@34151 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 ChangeLog                |   26 ++++++++++++++++
 inits.c                  |    4 ++
 random.c                 |   74 +++++++++++++++++++++++++++++++++++----------
 st.c                     |   14 ++++++++-
 string.c                 |    7 ++++-
 test/ruby/test_string.rb |   13 ++++++++
 version.h                |    8 ++--
 7 files changed, 123 insertions(+), 23 deletions(-)

diff --git a/inits.c b/inits.c
index 947bbbe..a0e061f 100644
--- a/inits.c
+++ b/inits.c
@@ -38,6 +38,7 @@
 void Init_sym _((void));
 void Init_process _((void));
 void Init_Random _((void));
+void Init_RandomSeed _((void));
 void Init_Range _((void));
 void Init_Regexp _((void));
 void Init_signal _((void));
@@ -46,10 +47,13 @@
 void Init_Time _((void));
 void Init_var_tables _((void));
 void Init_version _((void));
+void Init_st _((void));
 
 void
 rb_call_inits()
 {
+    Init_RandomSeed();
+    Init_st();
     Init_sym();
     Init_var_tables();
     Init_Object();
diff --git a/random.c b/random.c
index c0560e3..24a0787 100644
--- a/random.c
+++ b/random.c
@@ -189,6 +189,7 @@
 #include <fcntl.h>
 #endif
 
+static int seed_initialized = 0;
 static VALUE saved_seed = INT2FIX(0);
 
 static VALUE
@@ -250,27 +251,22 @@
     return old;
 }
 
-static VALUE
-random_seed()
+#define DEFAULT_SEED_LEN (4 * sizeof(long))
+
+static void
+fill_random_seed(ptr)
+    char *ptr;
 {
     static int n = 0;
+    unsigned long *seed;
     struct timeval tv;
     int fd;
     struct stat statbuf;
+    char *buf = (char*)ptr;
 
-    int seed_len;
-    BDIGIT *digits;
-    unsigned long *seed;
-    NEWOBJ(big, struct RBignum);
-    OBJSETUP(big, rb_cBignum, T_BIGNUM);
-
-    seed_len = 4 * sizeof(long);
-    big->sign = 1;
-    big->len = seed_len / SIZEOF_BDIGITS + 1;
-    digits = big->digits = ALLOC_N(BDIGIT, big->len);
-    seed = (unsigned long *)big->digits;
+    seed = (unsigned long *)buf;
 
-    memset(digits, 0, big->len * SIZEOF_BDIGITS);
+    memset(buf, 0, DEFAULT_SEED_LEN);
 
 #ifdef S_ISCHR
     if ((fd = open("/dev/urandom", O_RDONLY
@@ -285,7 +281,7 @@
 #endif
             )) >= 0) {
         if (fstat(fd, &statbuf) == 0 && S_ISCHR(statbuf.st_mode)) {
-            read(fd, seed, seed_len);
+            read(fd, seed, DEFAULT_SEED_LEN);
         }
         close(fd);
     }
@@ -296,13 +292,37 @@
     seed[1] ^= tv.tv_sec;
     seed[2] ^= getpid() ^ (n++ << 16);
     seed[3] ^= (unsigned long)&seed;
+}
+
+static VALUE
+make_seed_value(char *ptr)
+{
+    BDIGIT *digits;
+    NEWOBJ(big, struct RBignum);
+    OBJSETUP(big, rb_cBignum, T_BIGNUM);
+
+    RBIGNUM_SET_SIGN(big, 1);
+
+    digits = ALLOC_N(char, DEFAULT_SEED_LEN);
+    RBIGNUM(big)->digits = digits;
+    RBIGNUM(big)->len = DEFAULT_SEED_LEN / SIZEOF_BDIGITS;
+
+    MEMCPY(digits, ptr, char, DEFAULT_SEED_LEN);
 
     /* set leading-zero-guard if need. */
-    digits[big->len-1] = digits[big->len-2] <= 1 ? 1 : 0;
+    digits[RBIGNUM_LEN(big)-1] = digits[RBIGNUM_LEN(big)-2] <= 1 ? 1 : 0;
 
     return rb_big_norm((VALUE)big);
 }
 
+static VALUE
+random_seed(void)
+{
+    char buf[DEFAULT_SEED_LEN];
+    fill_random_seed(buf);
+    return make_seed_value(buf);
+}
+
 /*
  *  call-seq:
  *     srand(number=0)    => old_seed
@@ -443,6 +463,9 @@
     long val, max;
 
     rb_scan_args(argc, argv, "01", &vmax);
+    if (!seed_initialized) {
+       rand_init(random_seed());
+    }
     switch (TYPE(vmax)) {
       case T_FLOAT:
    if (RFLOAT(vmax)->value <= LONG_MAX && RFLOAT(vmax)->value >= LONG_MIN) {
@@ -490,6 +513,8 @@
     return LONG2NUM(val);
 }
 
+static char initial_seed[DEFAULT_SEED_LEN];
+
 void
 rb_reset_random_seed()
 {
@@ -497,9 +522,24 @@
 }
 
 void
+Init_RandomSeed(void)
+{
+    fill_random_seed(initial_seed);
+    init_by_array((unsigned long*)initial_seed, DEFAULT_SEED_LEN/sizeof(unsigned long));
+    seed_initialized = 1;
+}
+
+static void
+Init_RandomSeed2(void)
+{
+    saved_seed = make_seed_value(initial_seed);
+    memset(initial_seed, 0, DEFAULT_SEED_LEN);
+}
+
+void
 Init_Random()
 {
-    rand_init(random_seed());
+    Init_RandomSeed2();
     rb_define_global_function("srand", rb_f_srand, -1);
     rb_define_global_function("rand", rb_f_rand, -1);
     rb_global_variable(&saved_seed);
diff --git a/st.c b/st.c
index c16c310..21e157a 100644
--- a/st.c
+++ b/st.c
@@ -9,6 +9,7 @@
 #include <stdlib.h>
 #endif
 #include <string.h>
+#include <limits.h>
 #include "st.h"
 
 typedef struct st_table_entry st_table_entry;
@@ -521,6 +522,8 @@ struct st_table_entry {
     return 0;
 }
 
+static unsigned long hash_seed = 0;
+
 static int
 strhash(string)
     register const char *string;
@@ -550,10 +553,11 @@ struct st_table_entry {
 
     return val + (val << 15);
 #else
-    register int val = 0;
+    register unsigned long val = hash_seed;
 
     while ((c = *string++) != '\0') {
    val = val*997 + c;
+   val = (val << 13) | (val >> (sizeof(st_data_t) * CHAR_BIT - 13));
     }
 
     return val + (val>>5);
@@ -573,3 +577,11 @@ struct st_table_entry {
 {
     return n;
 }
+
+extern unsigned long rb_genrand_int32(void);
+
+void
+Init_st(void)
+{
+    hash_seed = rb_genrand_int32();
+}
diff --git a/string.c b/string.c
index c6b2301..94a0281 100644
--- a/string.c
+++ b/string.c
@@ -875,13 +875,15 @@
     return str1;
 }
 
+static unsigned long hash_seed;
+
 int
 rb_str_hash(str)
     VALUE str;
 {
     register long len = RSTRING(str)->len;
     register char *p = RSTRING(str)->ptr;
-    register int key = 0;
+    register unsigned long key = hash_seed;
 
 #if defined(HASH_ELFHASH)
     register unsigned int g;
@@ -905,6 +907,7 @@
     while (len--) {
    key = key*65599 + *p;
    p++;
+   key = (key << 13) | (key >> ((sizeof(unsigned long) * CHAR_BIT) - 13));
     }
     key = key + (key>>5);
 #endif
@@ -5062,4 +5065,6 @@ struct tr {
     rb_fs = Qnil;
     rb_define_variable("$;", &rb_fs);
     rb_define_variable("$-F", &rb_fs);
+
+    hash_seed = rb_genrand_int32();
 }
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 5f2c54f..4d97182 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1,4 +1,5 @@
 require 'test/unit'
+require File.expand_path('envutil', File.dirname(__FILE__))
 
 class TestString < Test::Unit::TestCase
   def check_sum(str, bits=16)
@@ -29,4 +30,16 @@ def test_inspect
   ensure
     $KCODE = original_kcode
   end
+
+  def test_hash_random
+    str = 'abc'
+    a = [str.hash.to_s]
+    cmd = sprintf("%s -e 'print %s.hash'", EnvUtil.rubybin, str.dump)
+    3.times {
+      IO.popen(cmd, "rb") {|o|
+        a << o.read
+      }
+    }
+    assert_not_equal([str.hash.to_s], a.uniq)
+  end
 end

--- a/version.c 2011-12-19 03:22:43.000000000 +0000
+++ b/version.c 2011-12-29 18:18:58.000000000 +0000
@@ -46,7 +46,7 @@
     rb_define_global_const("RUBY_PATCHLEVEL", INT2FIX(RUBY_PATCHLEVEL));
 
     snprintf(description, sizeof(description),
-             "ruby %s (%s %s %d) [%s], MBARI 0x%x, Ruby Enterprise Edition %s",
+             "ruby %s (%s %s %d) [%s], MBARI 0x%x, Ruby Enterprise Edition %s (with hash random)",
              RUBY_VERSION, RUBY_RELEASE_DATE, RUBY_RELEASE_STR,
              RUBY_RELEASE_NUM, RUBY_PLATFORM,
              STACK_WIPE_SITES, REE_VERSION);
--
1.7.5.4

You can view it or download it from github.

Disclaimer: This is provided as is, no guarantees are provided, etc.



PlanetMySQL Voting: Vote UP / Vote DOWN

ruby mysql2 gem and MySQL 5.5 client library failure

Октябрь 6th, 2011
My team was provisioning a ruby on rails application on a new Operating System with different ruby and client libraries and we came across a strange bug.

Intermittently we were seeing the failures in the application with the following messages in the passenger log:

NoMethodError: undefined method `each' for nil:NilClass

and

(NoMethodError) "undefined method `each' for nil:NilClass"

/usr/local/rvm/gems/ruby-1.9.2-p0/ruby/1.9.1/gems/mysql2-0.2.4/lib/active_record/connection_adapters/mysql2_adapter.rb:628:in `select'
/usr/local/rvm/gems/ruby-1.9.2-p0/ruby/1.9.1/gems/activerecord-3.0.0/lib/active_record/connection_adapters/abstract/database_statements.rb:7:in `select_all'
/usr/local/rvm/gems/ruby-1.9.2-p0/ruby/1.9.1/gems/activerecord-3.0.0/lib/active_record/connection_adapters/abstract/query_cache.rb:54:in `block in select_all'
/usr/local/rvm/gems/ruby-1.9.2-p0/ruby/1.9.1/gems/activerecord-3.0.0/lib/active_record/connection_adapters/abstract/query_cache.rb:68:in `cache_sql'
/usr/local/rvm/gems/ruby-1.9.2-p0/ruby/1.9.1/gems/activerecord-3.0.0/lib/active_record/connection_adapters/abstract/query_cache.rb:54:in `select_all'
/usr/local/rvm/gems/ruby-1.9.2-p0/ruby/1.9.1/gems/activerecord-3.0.0/lib/active_record/base.rb:467:in `find_by_sql'
/usr/local/rvm/gems/ruby-1.9.2-p0/ruby/1.9.1/gems/activerecord-3.0.0/lib/active_record/relation.rb:64:in `to_a'
/usr/local/rvm/gems/ruby-1.9.2-p0/ruby/1.9.1/gems/activerecord-3.0.0/lib/active_record/relation/finder_methods.rb:143:in `all'


We found similar people experiencing the same problem here though the issues page on github didnt state a resolution.

We discovered we were running the MySQL 5.5 client libraries - namely the libmysqlclient 18. mysql2.so was built against  libmysqlclient 18:


[root@host]# ldd ./vendor/bundle/ruby/1.8/gems/mysql2-0.2.13/ext/mysql2/mysql2.so
linux-vdso.so.1 =>  (0x00007fff51788000)
libruby.so.1.8 => /usr/lib64/libruby.so.1.8 (0x00007f47f68e4000)
libmysqlclient.so.18 => /usr/lib64/libmysqlclient.so.18 (0x00007f47f6389000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f47f6165000)
libm.so.6 => /lib64/libm.so.6 (0x00007f47f5ee1000)
librt.so.1 => /lib64/librt.so.1 (0x00007f47f5cd9000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007f47f5ad4000)
libcrypt.so.1 => /lib64/libcrypt.so.1 (0x00007f47f589d000)
libc.so.6 => /lib64/libc.so.6 (0x00007f47f551e000)
libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007f47f5307000)
/lib64/ld-linux-x86-64.so.2 (0x0000003eccc00000)
libfreebl3.so => /usr/lib64/libfreebl3.so (0x00007f47f50aa000)



We are running Centos6 and use the Percona MySQL client libraries, so we downgraded from Percona-Server-shared-55 to Percona-Server-shared-51 to provide the earlier client library.

We gem bundled against an earlier version of MySQL client libraries (libmysqlclient 16) and we no longer experienced that intermit failure:


[root@host]# ldd vendor/bundle/ruby/1.8/gems/mysql2-0.2.13/lib/mysql2/mysql2.so
linux-vdso.so.1 =>  (0x00007fff9e7d5000)
libruby.so.1.8 => /usr/lib64/libruby.so.1.8 (0x00007ff304784000)
libmysqlclient_r.so.16 => /usr/lib64/mysql/libmysqlclient_r.so.16 (0x00007ff30436e000)
libz.so.1 => /lib64/libz.so.1 (0x00007ff304159000)
librt.so.1 => /lib64/librt.so.1 (0x00007ff303f51000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007ff303d33000)
libcrypt.so.1 => /lib64/libcrypt.so.1 (0x00007ff303afc000)
libnsl.so.1 => /lib64/libnsl.so.1 (0x00007ff3038e3000)
libm.so.6 => /lib64/libm.so.6 (0x00007ff30365e000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007ff30345a000)
libc.so.6 => /lib64/libc.so.6 (0x00007ff3030db000)
libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007ff302ec4000)
/lib64/ld-linux-x86-64.so.2 (0x0000003a52200000)
libfreebl3.so => /lib64/libfreebl3.so (0x00007ff302c67000)



I'm yet to discover the specific issue with what I think is asynchronous caching race conditions and libmysqlclient18 with the mysql2 gem, but wanted to share my experience with the wider community to avoid others from going down the same rabbit hole I went down today.



PlanetMySQL Voting: Vote UP / Vote DOWN

DbCharmer 1.7.0 Release: Rails 3.0 Support and Forced Slave Reads

Сентябрь 1st, 2011

This week, after 3 months in the works, we’ve finally released version 1.7.0 of DbCharmer ruby gem – Rails plugin that significantly extends ActiveRecord’s ability to work with multiple databases and/or database servers by adding features like multiple databases support, master/slave topologies support, sharding, etc.

New features in this release:

  • Rails 3.0 support. We’ve worked really hard to bring all the features we supported in Rails 2.X to the new version of Rails and now I’m proud that we’ve implemented them all and the implementation looks much cleaner and more universal (all kinds of relations in rails 3 work in exactly the same way and we do not need to implement connection switching for all kinds of weird corner-cases in ActiveRecord).
  • Forced Slave Reads functionality. Now we could have models with slaves that are not used by default, but could be turned on globally (per-controller, per-action or in a block). This is a new feature that brings our master/slave routing capabilities to a really new level – we could now use it for a really mission-critical models on demand and not be afraid of breaking major functionality of our applications by switching them to slave reads.
  • Lots of changes were made in the structure of our code and tests to make sure it would be much easier for new developers to understand DbCharmer internals and make changes in its code.

Along with the new release we’ve got a brand new web site. You can find much better, cleaner and, most importantly, correct documentation for the library on the web site. We’ll be adding more examples, will try to add more in-depth explanation of our core functions, etc.

If you have any questions about the release, feel free to ask them in our new mailing list: DbCharmer Users Group.

For more updates in our releases, you can follow @DbCharmer on Twitter.



PlanetMySQL Voting: Vote UP / Vote DOWN

On Password Strength

Август 11th, 2011

XKCD (as usual) makes a very good point – this time about password strength, and I reckon it’s something app developers need to consider urgently. Geeks can debate the exact amount of entropy, but that’s not really the issue: insisting on mixed upper/lower and/or non-alpha and/or numerical components to a user password does not really improve security, and definitely makes life more difficult for users.

So basically, the functions that do a “is this a strong password” should seriously reconsider their approach, particularly if they’re used to have the app decide whether to accept the password as “good enough” at all.


PlanetMySQL Voting: Vote UP / Vote DOWN

Speaking at NovaRUG on Thursday

Август 16th, 2010

I’ll be joining the NovaRUG (Northern Virginia Ruby Users’ Group) on Thursday to talk about MySQL performance. See their blog for the details and how to RSVP.

Related posts:

  1. Speaking at MySQL Meetup in Northern Virginia
  2. Speaking at EdUI Conference 2009
  3. Speaking at Surge 2010
  4. Speaking at CPOSC 2009
  5. Speaking at Enterprise LAMP Summit 2009


PlanetMySQL Voting: Vote UP / Vote DOWN

Ever tried calling a win32ole (COM) object from Ruby’s DRb?

Июнь 26th, 2010

Before we get started here, let me state that I am using Ruby 1.9.1 (I refuse to look back!), and that I have not tested this solution on Ruby 1.8.6, but it should work there as well, though I may have some 1.9-isms in my code. Should be easy enough to spot.

I am working on writing an application in Ruby that can talk to an Windows application that has an ActiveX COM Automation object exposed. Ruby is basically the wrapper so that I can access the application from the Linux side of the world. So, I am using Ruby’s DRb to bridge those worlds because, after all, I am the Linux Bloke!

Well, as you may have guessed, I ran into problems with this approach. I simply could not call the COM objects from a call initiated with DRb, though I could call them directly just fine. After scratching my head a bit, I figured it out.

The win32ole module that runs on the Windows side of the world in Ruby only wants to run in the same thread that it was started in. win32ole is simply not thread-safe, and this has to do in large part to how ActiveX works under Windows. No need to delve into the gory details as we want code that works already!

DRb is very much all about threads. The DRb Server runs in a separate thread, and threads are launched each time a DRb request comes in. Threads abound like crazy! After all, it is very clear that the implementation of DRb was based, in part, on the Java threading model and Java’s RMI. But we knew that. We know that Ruby Threads parrot Java Threads. And I’ve done a lot of work with Java Threads in the past and almost feel a bit of “déjà vu” in working with them in Ruby. Oh the days…

But I digress.

We have a major problem here. How do we get around it, without having to throw out DRb and doing something funky like writing some custom RPC bit just to make Windows happy?

Well, as you may have guess, the Linux Bloke created the very solution you need!! Funnel!

Funnel works by wrapping a given object with a “meta” object that can then be called from any thread. All the calls are actually queued up and processed by the thread the target object wants to run in. The calling threads block until the target object returns the call, and the result objects are stuffed somewhere so that the calling thread can find them.

It’s all very transparent and you need not do anything special — much. You will need to call process_funnel_messages() in the funneled thread. And you may do this once in which case process_funnel_messages() will loop forever and never return, or you can call it at regular intervals if you need to do other processing in that same thread.

You, of course, can use Funnel anywhere you need to funnel calls from multiple threads to a single thread to access something that is not inherently thread-safe or thread-aware.

The downloadable code is posted here:

?Download funnel.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
=begin rdoc
Funnel created by Fred Mitchell (LinuxBloke.com) on 2010-06-05                         
 
=Funnel -- funnel calls to an object to a specific thread that created said object.    
 
With some systems, like win32ole, the system basically wants to run on the same thread
the system was started on. To facilitate that need in a multi-threaded environment,
we create the Funnel.                                                                  
 
The Funnel wrapper on an object will basically intercept all method calls and
funnel those calls to the wrapped object in the thread it was created in. The
caller thread will basically block until the Funnel calls the target object's method
and will be given, as a return, the result object of that call.                        
 
The Funnel thread will basically sit in a loop waiting for something to come in,
and wake up to process the entries, then go back to sleep until the next ones come
in.                                                                                    
 
Any exceptions (or errors) that occur in the Funnel shall be
thrown to the caller thread, as though the exception took place in that thread.        
 
This code is released under the GPLv3.                                                 
 
=end                         
 
module Funnel
  class Wrapper
    def initialize(target)
      @targetOb = target
      @targetThr = Thread.current
      @targetThr[:methQueue] = [] if @targetThr[:methQueue].nil?
    end                                                                                
 
    def method_missing(meth, *parms)
      Thread.current[:methResult] = :nothing_yet
      @targetThr[:methQueue] &lt;&lt; [@targetOb, meth, Thread.current, parms]               
 
      # Thing is, we may have gotten a response already!
      while Thread.current[:methResult] == :nothing_yet
        if @targetThr.stop?
          @targetThr.wakeup
          # Thread.stop
        end
        Thread.pass
      end
      Thread.current[:methResult]
    end
  end                                                                                  
 
  # Called by the orginal thread to process object messages.
  # This function never returns.
  def process_funnel_messages(loop_forever = true)
    begin
      meth = nil
      (ob, meth, thr, parms) = Thread.current[:methQueue].shift unless Thread.current[\
:methQueue].nil?
      unless meth.nil?
        begin
          thr[:methResult] = ob.send(meth, *parms)
          thr.run
        rescue
          thr.raise($!)
        end
      else
        Thread.stop if loop_forever
      end
    end while loop_forever
  end                                                                                  
 
  def wrap(target)
    Wrapper.new(target)
  end
end

And here is an example of its use:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
require 'funnel'                                                                       
include Funnel                                                                         
 
class StupidThreadUnsafeThing                                                          
  def callme                                                                           
    puts "*** I've been called. My thread is"                                          
    p Thread.current                                                                   
    puts                                                                               
  end                                                                                  
end                                                                                    
 
stut = StupidThreadUnsafeThing.new                                                     
 
# This is the easy to use wrapper                                                      
fstut = wrap stut                                                                      
 
stut.callme                                                                            
 
Thread.new do                                                                          
  10.times do |i|                                                                      
    sleep 1                                                                            
    Thread.new {                                                                       
      puts "XXX #{i} calling stut from thread"                                         
      p Thread.current                                                                 
      fstut.callme                                                                     
    }                                                                                  
  end                                                                                  
  exit                                                                                 
end                                                                                    
 
# Here we loop forever processing messages.                                            
# Optionally, we could call this repeateady                                            
# to process messages by using a parameter of                                          
# "false".                                                                             
process_funnel_messages

This code is fairly straightforward, as you can see. If there is enough interest, I’ll consider turning this into a gem.


PlanetMySQL Voting: Vote UP / Vote DOWN

DbCharmer – Rails Can Scale!

Апрель 17th, 2010

Back in November 2009 I was working on a project to port Scribd.com code base to Rails 2.2 and noticed that some old plugins we were using in 2.1 were abandoned by their authors. Some of them were just removed from the code base, but one needed a replacement – that was an old plugin called acts_as_readonlyable that helped us to distribute our queries among a cluster of MySQL slaves. There were some alternatives but we didn’t like them for one or another reasons so we’ve decided to go with creating our own ActiveRecord plugin, that would help us scale our databases out. That’s the story behind the first release of DbCharmer.

Today, six months after the first release of the gem and we’ve moved it to gemcutter (which is now the official gems hosting) and we’re already at version 1.6.11. The gem was downloaded more than 2000 times. There are (at least) 10+ large users that rely on this gem to scale their products out. And (this is the most exciting) we’ve added tons of new features to the product.

Here are the main features added since the first release:

  • Much better multi-database migrations support including default migrations connection changing.
  • We’ve added ActiveRecord associations preload support that makes it possible to move eager loading queries to the same connection where your finder queries go to.
  • We’ve improved ActiveRecord’s query logging feature and now you can see what connections your queries executed on (and yes, all those improvements are colorized :-) ).
  • We’ve added an ability to temporary remap any ActiveRecord connections to any other connections for a block of code (really useful when you need to make sure all your queries would go to some non-default slave and you do not want to mess with all your models).
  • The most interesting change: we’ve implemented some basic sharding functionality in ActiveRecord which currently is being used in production in our application.

As you can see now DbCharmer helps you to do three major scalability tasks in your Rails projects:

  1. Master-Slave clusters to scale out your Rails models reads.
  2. Vertical sharding by moving some of your models to a separate (maybe even dedicated) servers and still keep using AR associations
  3. Horizontal sharding by slicing your models data to pieces and placing those pieces into different databases and/or servers.

So, If you didn’t check DbCharmer out yet and you’re working on some large rails project that is (or going to be) facing scalability problems, go read the docs, download/install the gem and prove them that Rails CAN scale!



PlanetMySQL Voting: Vote UP / Vote DOWN

Liveblogging at Confoo: [not just] PHP Performance by Rasmus Lerdorf

Март 11th, 2010

Most of this stuff is not PHP specific, and Python or Ruby or Java or .NET developers can use the tools in this talk.

The session on joind.in, with user comments/feedback, is at http://joind.in/talk/view/1320.

Slides are at http://talks.php.net/show/confoo10

“My name is Rasmus, I’ve been around for a long time. I’ve been doing this web stuff since 1992/1993.”

“Generally performance is not a PHP problem.” Webservers not config’d, no expire headers on images, no favicon.

Tools: Firefox/Firebug extension called YSlow (developed by yahoo) gives you a grade on your site.

Google has developed the Firefox/Firebug pagespeed tool.

Today Rasmus will pick on wordpress. He checks out the code, then uses Siege to do a baseline benchmark — see the slide for the results.

Before you do anything else install an opcode cache like APC. Wordpress really likes this type of caching, see this slide for the results. Set the timezone, to make sure conversions aren’t being done all the time.

Make sure you are cpu-bound, NOT I/O bound. Otherwise, speed up the I/O.

Then strace your webserver processs. There are common config issues that you can spot in your strace code. grep for ENOENT which shows you “No such file or directory” errors.

AllowOverride None to turn off .htaccess for every directory, just read settings once from your config file….(unless you’re an ISP).

Make sure DirectoryIndex is set appropriately, watch your include_path. All this low-hanging fruit has examples on the common config issues slide.

Install pecl/inclued and generate a graph – here is the graph image (I have linked it because you really want to zoom in to the graph…)

In strace output check the open() calls. Conditional includes, function calls that include files, etc. need runtime context before knowing what to open. In the example, every request checks to see if we have the config file, once we have config’d we can get rid of that stuff. Get rid of all the conditionals and hard-code “include wp-config.php”. Examples are on the slide.

His tips to change:
Conditional config include in wp-load.php (as just mentioned)
Conditional did-header check in wp-blog-header.php
Don’t call require_wp_db() from wp-settings.php
Remove conditional require logic from wp_start_object_cache

Then check strace again, now all Rasmus sees is theming and translations, which he decided to keep, because that’s the good benefit of Wordpress – Performance is all about costs vs. flexibility. You don’t want to get rid of all of your flexibility, but you want to be fast.

Set error_reporting(-1) in wp-settings.php to catch all warnings — warnings slow you down, so get rid of all errors. PHP error handling is very slow, so getting rid of errors will make you faster.

The slide of warnings that wordpress throws.

Look at all C-level calls made, using callgrind, which sits under valgrind, a CPU emulator used for debugging. See the image of what callgrind shows.

Now dive into the PHP executor, by installing XDebug.

Check xhprofFacebook open sourced this about a year ago, it’s a PECL extension. The output is pretty cool, try it on your own site, Rasmus does show you how to use it. It shows you functions sorted by the most expensive to the least expensive.

For example, use $_SERVER[REQUEST_TIME] instead of time(). Use pconnect() if MySQL can handle the amount of webserver connections that will be persistent, etc.

After you have changed a lot of the stuff above, benchmark again with siege to see how much faster you are. In this case there is not much gained so far.

So keep going….the blogroll is very slow — Rasmus gets rid of it by commenting out in the sidebar.php file. I’d like to see something to make it “semi-dynamic” — that is, make it a static file that can be re-generated, since you might want the blogroll but links are not changed every second…..

At this point we’re out of low-hanging fruit.

HipHop is a PHP to C++ converter & compiler, including a threaded, event-driven server that replaces apache. Rasmus’ slide says “Wordpress is well-suited for HipHop because it doesn’t have a lot of dynamic runtime code. This is using the standard Wordpress-svn checkout with a few tweaks.”

Then, of course, benchmark again.

The first time you compile Wordpress with HipHop, you give it a list of files to add to the binary, it will complain about php code that generate file names, so you do have to fix that kind of stuff. There’s a huge mess of errors the first time you run it (”pages and pages”), and Rasmus had to patch HipHop (and Wordpress) but the changes in HipHop have been put back into HipHop, so you should be good for the most part.

Check out the errors, lots of them show logical errors like $foo.”bar” instead of $foo.=”bar” and $foo=”bar” instead of $foo==”bar” in an if statement. Which of course is nice for your own code, to find those logical errors.

(Wordpress takes in a $user_ID argument and immediately initializes a global $user_ID variable, which overwrites the argument passed in, so you can change the name of the argument passed in….)

You can also get rid of some code, things that check for existence of the same thing more than once. So it will take a bit of tweaking, but it’s worth it.

There are limitations to HipHop, for example:

  • It doesn’t support any of the new PHP 5.3 language features
  • Private properties don’t really exist under HipHop. They are treated as if they are protected instead.
  • You can’t unset variables. unset will clear the variable, but it will still be in the symbol table.
  • eval and create_function are limited
  • Variable variables $$var are not supported
  • Dynamic defines won’t work: define($name,$value)
  • get_loaded_extensions(), get_extension_funcs(), phpinfo(), debug_backtrace() don’t work
  • Conditional and dynamically created include filenames don’t work as you might expect
  • Default unix-domain socket filename isn’t set for MySQL so connecting to localhost doesn’t work

and HipHop does not support all extensions — see the list Rasmus has of extensions HipHop supports.

Then Rasmus showed an example using Twit (which he wrote) including the benchmarks. He shows that you can see what’s going on, like 5 MySQL calls on the home page and what happens when you don’t have a favicon.ico (in yellow).

In summary, “performance is all about architecture”, “know your costs”.

Be careful, because some tools (like valgrind and xdebug) you don’t want to put it on production systems, you could capture production traffic and replay it on a dev/testing box, but “you just have to minimize the differences and do your best”.


PlanetMySQL Voting: Vote UP / Vote DOWN

Liveblogging at Confoo: [not just] PHP Performance by Rasmus Lerdorf

Март 11th, 2010

Most of this stuff is not PHP specific, and Python or Ruby or Java or .NET developers can use the tools in this talk.

The session on joind.in, with user comments/feedback, is at http://joind.in/talk/view/1320.

Slides are at http://talks.php.net/show/confoo10

“My name is Rasmus, I’ve been around for a long time. I’ve been doing this web stuff since 1992/1993.”

“Generally performance is not a PHP problem.” Webservers not config’d, no expire headers on images, no favicon.

Tools: Firefox/Firebug extension called YSlow (developed by yahoo) gives you a grade on your site.

Google has developed the Firefox/Firebug pagespeed tool.

Today Rasmus will pick on wordpress. He checks out the code, then uses Siege to do a baseline benchmark — see the slide for the results.

Before you do anything else install an opcode cache like APC. Wordpress really likes this type of caching, see this slide for the results. Set the timezone, to make sure conversions aren’t being done all the time.

Make sure you are cpu-bound, NOT I/O bound. Otherwise, speed up the I/O.

Then strace your webserver processs. There are common config issues that you can spot in your strace code. grep for ENOENT which shows you “No such file or directory” errors.

AllowOverride None to turn off .htaccess for every directory, just read settings once from your config file….(unless you’re an ISP).

Make sure DirectoryIndex is set appropriately, watch your include_path. All this low-hanging fruit has examples on the common config issues slide.

Install pecl/inclued and generate a graph – here is the graph image (I have linked it because you really want to zoom in to the graph…)

In strace output check the open() calls. Conditional includes, function calls that include files, etc. need runtime context before knowing what to open. In the example, every request checks to see if we have the config file, once we have config’d we can get rid of that stuff. Get rid of all the conditionals and hard-code “include wp-config.php”. Examples are on the slide.

His tips to change:
Conditional config include in wp-load.php (as just mentioned)
Conditional did-header check in wp-blog-header.php
Don’t call require_wp_db() from wp-settings.php
Remove conditional require logic from wp_start_object_cache

Then check strace again, now all Rasmus sees is theming and translations, which he decided to keep, because that’s the good benefit of Wordpress – Performance is all about costs vs. flexibility. You don’t want to get rid of all of your flexibility, but you want to be fast.

Set error_reporting(-1) in wp-settings.php to catch all warnings — warnings slow you down, so get rid of all errors. PHP error handling is very slow, so getting rid of errors will make you faster.

The slide of warnings that wordpress throws.

Look at all C-level calls made, using callgrind, which sits under valgrind, a CPU emulator used for debugging. See the image of what callgrind shows.

Now dive into the PHP executor, by installing XDebug.

Check xhprofFacebook open sourced this about a year ago, it’s a PECL extension. The output is pretty cool, try it on your own site, Rasmus does show you how to use it. It shows you functions sorted by the most expensive to the least expensive.

For example, use $_SERVER[REQUEST_TIME] instead of time(). Use pconnect() if MySQL can handle the amount of webserver connections that will be persistent, etc.

After you have changed a lot of the stuff above, benchmark again with siege to see how much faster you are. In this case there is not much gained so far.

So keep going….the blogroll is very slow — Rasmus gets rid of it by commenting out in the sidebar.php file. I’d like to see something to make it “semi-dynamic” — that is, make it a static file that can be re-generated, since you might want the blogroll but links are not changed every second…..

At this point we’re out of low-hanging fruit.

HipHop is a PHP to C++ converter & compiler, including a threaded, event-driven server that replaces apache. Rasmus’ slide says “Wordpress is well-suited for HipHop because it doesn’t have a lot of dynamic runtime code. This is using the standard Wordpress-svn checkout with a few tweaks.”

Then, of course, benchmark again.

The first time you compile Wordpress with HipHop, you give it a list of files to add to the binary, it will complain about php code that generate file names, so you do have to fix that kind of stuff. There’s a huge mess of errors the first time you run it (”pages and pages”), and Rasmus had to patch HipHop (and Wordpress) but the changes in HipHop have been put back into HipHop, so you should be good for the most part.

Check out the errors, lots of them show logical errors like $foo.”bar” instead of $foo.=”bar” and $foo=”bar” instead of $foo==”bar” in an if statement. Which of course is nice for your own code, to find those logical errors.

(Wordpress takes in a $user_ID argument and immediately initializes a global $user_ID variable, which overwrites the argument passed in, so you can change the name of the argument passed in….)

You can also get rid of some code, things that check for existence of the same thing more than once. So it will take a bit of tweaking, but it’s worth it.

There are limitations to HipHop, for example:

  • It doesn’t support any of the new PHP 5.3 language features
  • Private properties don’t really exist under HipHop. They are treated as if they are protected instead.
  • You can’t unset variables. unset will clear the variable, but it will still be in the symbol table.
  • eval and create_function are limited
  • Variable variables $$var are not supported
  • Dynamic defines won’t work: define($name,$value)
  • get_loaded_extensions(), get_extension_funcs(), phpinfo(), debug_backtrace() don’t work
  • Conditional and dynamically created include filenames don’t work as you might expect
  • Default unix-domain socket filename isn’t set for MySQL so connecting to localhost doesn’t work

and HipHop does not support all extensions — see the list Rasmus has of extensions HipHop supports.

Then Rasmus showed an example using Twit (which he wrote) including the benchmarks. He shows that you can see what’s going on, like 5 MySQL calls on the home page and what happens when you don’t have a favicon.ico (in yellow).

In summary, “performance is all about architecture”, “know your costs”.

Be careful, because some tools (like valgrind and xdebug) you don’t want to put it on production systems, you could capture production traffic and replay it on a dev/testing box, but “you just have to minimize the differences and do your best”.


PlanetMySQL Voting: Vote UP / Vote DOWN

Reusing models outside of Rails

Январь 6th, 2010

If you have done a good job of building your rails models, you may find that they are helpful for your non-rails system maintenance and such. They may even be necessary to reuse if you follow the rails model of using activerecord validations (rather that database RI) to preserve the integrity of your data.


Or you may just find yourself rewriting the same code again and again, and want all that good railsiness to make it easier to write and maintain. Personally I find myself in some instance of ./script/console as often as irb just so I can get the activesupport helper methods ( 4.days.from_now and such) that many rails developers are surprised to find are not actually a standard part of ruby.


So, the good news is it is easy to reuse rails code outside of rails.


Let's say you want to do some data manipulation (reporting, loading, scrubbing, etc) in your rails db, and want to use your models to do it. A few imports in your ruby script gets the necessary environment in place:


require 'rubygems'
require 'yaml'
require 'active_record'
require 'logger'


and a few more will load up your models (note: they're probably not in the same location as mine, unless you are also working on an app called 'seweb' in your home dir):


PROJECT_HOME = "#{ENV['HOME']}/seweb/"
require "#{PROJECT_HOME}/app/models/sales_rep.rb"
require "#{PROJECT_HOME}/app/models/organization.rb"
require "#{PROJECT_HOME}/app/models/team.rb"


Then connect to the appropriate database (note I'm connecting to the development environment - can you guess how I'd connect to 'test' or 'production'?), with rails logging enabled:


ActiveRecord::Base.logger = Logger.new( STDERR )
db_config = YAML::load( File.open("#{seweb_home}/config/database.yml";))
ActiveRecord::Base.establish_connection( db_config["development"])


And you are good! If you are using a transactional database (such as my personal favorite, MySQL with InnoDB), you can make nice transaction wrappers for your work thusly:


ActiveRecord::Base.transaction do


        rep = SalesRep.find_or_initialize_by_name( 'Kyllin D. Quota' )
        # create the component parts
        if( rep.changed? )
            rep.organization = Organization.find_or_create_by_name 'APAC'
            rep.team = Team.find_or_create_by_name 'Enterprise'
            rep.save!
        end


        rescue Exception
            raise ActiveRecord::Rollback, "Invalid record for #{rep.name}"
        end


end


Pow. You get your rails sugar, rails validations, rails logging. Are you happy? Why yes, yes you are.


PlanetMySQL Voting: Vote UP / Vote DOWN