=begin Copyright (c) 2008, Trampoline Systems All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Trampoline Systems nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =end require "rubygems" require "sequel" DB = Sequel.mysql('test', :host => 'localhost') ["people", "movies", "ratings", "movie_statistics", "movie_correlations"].each do |sql| DB << "drop table if exists #{sql} cascade" end DB << " create table people( id int primary key ) " DB << " create table movies( id int primary key ) " DB << " create table ratings( person int references people (id), movie int references movies (id), rating float not null, unique key (person, movie) ) " [DB[:ratings], DB[:people], DB[:movies]].each{|t| t.delete} DB[:people].insert_multiple((0..1000).map{|i| {:id => i}}) DB[:movies].insert_multiple((0..1000).map{|i| {:id => i}}) DB[:ratings].insert_multiple( (0..50000).map{|i| [rand(1000), rand(1000)]}.uniq.map{|x, y, z| { :person => x, :movie => y, :rating => rand }}) DB << " create table movie_statistics( movie int primary key references movies(id), mean float, stddev float ) " DB << " insert into movie_statistics (movie, mean) select movie, sum(rating) / (select count(*) from people) mean from ratings group by movie " DB << " update movie_statistics join ( select movie, sum(rating * rating) sum from ratings group by movie ) summed_ratings on summed_ratings.movie = movie_statistics.movie set stddev = sqrt(sum / (select count(*) from people) - mean * mean) " DB << " create table movie_correlations( movie1 int references movies(id), movie2 int references movies(id), rho float, unique key (movie1, movie2), key (movie1), key (movie2) )" start = Time.now DB << " insert into movie_correlations ( movie1, movie2, rho ) select sf.m1, sf.m2, (sf.sum / (select count(*) from people) - stats1.mean * stats2.mean ) -- covariance / (stats1.stddev * stats2.stddev) from ( select r1.movie m1, r2.movie m2, sum(r1.rating * r2.rating) sum from ratings r1 join ratings r2 on r1.person = r2.person group by m1, m2 order by null ) sf join movie_statistics stats1 on stats1.movie = sf.m1 join movie_statistics stats2 on stats2.movie = sf.m2 " puts Time.now - start