Organising complex SQL queries in Rails

Vlado Cingel
vlado@cingel.hr

Active Record Query Interface

Makes it super simple to organise SQL queries using Scopes and Relations


          class Post
            has_many :comments

            scope :published, -> { where(published: true) }
            scope :not_published, -> { where(published: false) }
            scope :from_category, ->(category) { where(category: category) }
            scope :latest, -> { limit(10) }
          end

          class Comment
            belongs_to :post

            scope :approved, -> { where(approved: true) }
          end
          
ActiveRecord::Relation returned
Which allow us to:
Chain (call further methods) on them

          Post.published
          Comment.approved

          Post.from_category("ruby").latest.merge(Post.published)
          Comment.approved.joins(:post).where(post: Post.published)

          pots = Post.find(1)
          post.comments.approved
          post.comments.approved.where(approved_at: (1.week.ago..Time.zone.today))
          
Perform calculations on them

          posts = Post.published.from_category("ruby")

          posts.count
          posts.average("words_count")
          posts.min("words_count")
          posts.max("words_count")
          posts.sum("words_count")
          
Create, Update or Delete using them

          Post.not_published.delete_all

          Post.not_published.update_all(published: true)

          post.comments.approved.create(content: "New approved comment")
          
Lazy load and Cache them

          # app/controllers/posts_controller.rb
          @posts = Post.published

          # app/views/posts/index.html.erb
          <% cache(@posts) do %>
            <% @posts.each do |post| %>
              

<%= post.title %>

... <% end %> <% end %>

But...

What to do when ActiveRecord just isn't enough?


          class Clinic
            has_many :case_numbers
          end

          class CaseNumber
            belongs_to :clinic
          end
          

Iteration 1: Plain SQL


          class ClinicsSqlBuilder
            QUERY = <<-SQL
              WITH relevant_clinics AS (
                SELECT clinics.id, clinics.name, SUM(case_numbers.count) AS cases_count FROM clinics INNER JOIN case_numbers ON case_numbers.clinic_id = clinics.id WHERE case_numbers.code LIKE :code GROUP BY clinics.id
              ), stats AS (
                SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cases_count DESC) AS quartile_2, PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY cases_count DESC) AS quartile_1 FROM relevant_clinics
              )
              SELECT id, name, cases_count, CASE ... END AS RANK FROM relevant_clinics ORDER BY cases_count DESC
            SQL

            def sql_for_clinics_matching(code)
              ActiveRecord::Base.sanitize_sql_array([QUERY, code: "#{code}%"])
            end
          end
          

Iteration 1: Plain SQL


          sql_builder = ClinicsSqlBuilder.new

          Clinic.find_by_sql(sql_builder.sql_for_clinics_matching("F4"))
          

          class ClinicsSqlBuilder
            RELEVANT_CLINICS_CTE = <<-SQL
              WITH relevant_clinics AS (
                SELECT clinics.id, clinics.name, SUM(case_numbers.count) AS cases_count FROM clinics INNER JOIN case_numbers ON case_numbers.clinic_id = clinics.id WHERE case_numbers.code LIKE :code GROUP BY clinics.id
              ), stats AS (
                SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cases_count DESC) AS quartile_2, PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY cases_count DESC) AS quartile_1 FROM relevant_clinics
              )
            SQL

            def sql_for_clinics_matching(code)
              sql = <<-SQL
                #{RELEVANT_CLINICS_CTE}
                SELECT id, name, cases_count, CASE ... END AS RANK FROM relevant_clinics ORDER BY cases_count DESC
              SQL
              ActiveRecord::Base.sanitize_sql_array([sql, code: "#{code}%"])
            end

            def count_sql_for_clinics_matching(code)
              sql = <<-SQL
                #{RELEVANT_CLINICS_CTE}
                SELECT count(*) FROM relevant_clinics ORDER BY cases_count DESC
              SQL
              ActiveRecord::Base.sanitize_sql_array([sql, code: "#{code}%"])
            end
          end
          

Iteration 1: Plain SQL


          sql_builder = ClinicSqlBuilder.new

          Clinic.find_by_sql(sql_builder.sql_for_clinics_matching("F4"))
          Clinic.count_by_sql(sql_builder.sql_for_clinics_matching("F4"))
          

Iteration 1: Plain SQL

Chainable 😞
Calculations 😞
Create, Update, Delete 😞
Lazy Load 😞
Cache 😞

Iteration 1: Plain SQL

Pros:
Simple and readable
Easy to debug
Probably the best option for big reports and exports
Cons:
Not flexibile
Not database agnostic

Iteration 2: Arel

  • Relation Algebra for Ruby
  • Generate AST and builds SQL query
  • Knows nothing about your models and database
  • Does not read or write the data

Iteration 2: Arel


          Clinic.arel_table # => Arel::Table
          Clinic.arel_table[:name] # => Arel::Attributes::Attribute

          Clinic.arel_table.project(Arel.star) # => Arel::SelectManager
          Clinic.all.arel # => Arel::SelectManager

          clinics_table = Clinic.arel_table
          clinics_table.where(clinics_table[:name].eq("Rebro"))
          # SELECT * FROM clinics WHERE clinics.name = 'Rebro'
          

Iteration 2: Arel


          class Clinic
            has_many :case_numbers

            scope :matches_medical_topic, -> (q) {
              joins(:case_numbers)
                .where(CaseNumber.arel_table[:code].matches("#{q}%"))
            }
          end

          Clinic.matches_medical_topic("f4")
          # => ActiveRecord::Relation
          # SELECT "clinics".* FROM "clinics"
          #   INNER JOIN "case_numbers" ON "case_numbers"."clinic_id" = "clinics"."id"
          #   WHERE "case_numbers"."code" ILIKE 'f4%'
          

Iteration 2: Arel


          class Clinic
            has_many :case_numbers
            scope :matches_medical_topic, -> (q) { joins(:case_numbers).where(CaseNumber.arel_table[:code].matches("#{q}%")) }

            def self.search(q)
              relevant_clinics_table = Arel::Table.new(:relevant_clinics)
              relevat_clinics_expression = matches_medical_topic(q)
                .select(arel_table[:id], arel_table[:name], CaseNumber.arel_table[:count].sum.as("cases_count"))
                .group("clinics.id")
                .arel

              stats_table = Arel::Table.new(:stats)
              stats_expression = select("PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cases_count DESC) AS quartile_2")
                .select("PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cases_count DESC) AS quartile_1")
                .from(relevant_clinics_table)
                .arel

              statement = arel_table
                .project(
                  relevant_clinics_table[:id],
                  relevant_clinics_table[:name],
                  relevant_clinics_table[:cases_count],
                  "CASE WHEN cases_count > (SELECT quartile_2 FROM stats) THEN 'High' WHEN cases_count > (SELECT quartile_1 FROM stats) THEN 'Average' ELSE 'Low' END as rank"
                )
                .with([
                  Arel::Nodes::As.new(relevant_clinics_table, relevat_clinics_expression),
                  Arel::Nodes::As.new(stats_table, stats_expression)
                ])
                .from(relevant_clinics_table)
                .order(relevant_clinics_table[:cases_count].desc)

              find_by_sql(statement.to_sql)
            end
          end

          Clinic.search("f4")
          

Iteration 2: Arel


          case_numbers = CaseNumber.arel_table
          cte_table = Arel::Table.new(:cool_case_numbers)
          cte_table_expression = case_numbers.where(case_numbers[:code].eq("cool"))

          clinics = Clinic.arel_table
          clinic_ids = clinics
            .project(clinics[:id])
            .join(cte_table).on(clinics[:id].eq(cte_table[:clinic_id]))
            .with(Arel::Node::As.new(cte_table, cte_expression))

          Clinic.where(clinics[:id].in(clinic_ids))
          

Iteration 2: Arel

Chainable 😐
Calculations 😐
Create, Update, Delete 😐
Lazy Load 😐
Cache 😐

Iteration 2: Arel

Pros:
Super Powers
Cons:
Noisy
Requires some workarounds to return ActiveRecord::Relation
Undocumented

Iteration 3: Extend Rails

How hard can it be to add CTE support to Rails?

.with is already in Arel ...

Iteration 3: Extend Rails


          module ActiveRecord
            module Querying
              delegate :with, to: :all
            end

            class Relation
              def with(opts, *rest)
                return self if opts.blank?

                spawn.with!(opts, *rest)
              end

              def with!(opts, *rest)
                self.with_values += [opts] + rest
                self
              end

              def with_values
                @values[:with] || []
              end

              def with_values=(values)
                raise ImmutableRelation if @loaded
                @values[:with] = values
              end

              def build_arel(aliases)
                arel = super(aliases)
                build_with!(arel) if @values[:with]
                arel
              end

              def build_with!(arel)
                return if with_values.empty?

                with_statements = with_values.map do |with_value|
                  with_value.map do |name, expression|
                    table = Arel::Table.new(name)
                    select = Arel::Nodes::SqlLiteral.new("(#{expression.to_sql})")
                    Arel::Nodes::As.new(table, select)
                  end
                end

                arel.with with_statements
              end
            end
          end
          

Iteration 3: Extend Rails


          class Clinic
            has_many :case_numbers
            scope :matches_medical_topic, -> (q) {
              joins(:case_numbers).where(CaseNumber.arel_table[:code].matches("#{q}%"))
            }

            def self.search(q)
              relevant_clinics = matches_medical_topic(q)
                .select(arel_table[:id], arel_table[:name], CaseNumber.arel_table[:count].sum.as("cases_count"))
                .group(arel_table[:id])

              stats = select(
                  "PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY cases_count DESC) AS quartile_2",
                  "PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY cases_count DESC) AS quartile_1"
                )
                .from("relevant_clinics")

              with(relevant_clinics: relevant_clinics, stats: stats)
                .from("relevant_clinics clinics")
                .order("cases_count DESC")
            end
          end
          

Iteration 3: Extend Rails

Chainable 😀
Calculations 😀
Create, Update, Delete 😀
Lazy Load 😀
Cache 😀

iteration 2: Extend Rails

Pros:
Readable
Flexible
All ActiveRecord::Relation goodies
Cons:
Monkey patch

Other options?

Database views


          CREATE VIEW cool_clinics AS (
            SELECT sum(count) ...
          )
          SELECT * FROM cool_clinics
          
  • Stored set of queries against underlying tables
  • Great for reports, full-text search across multiple models
  • No out-of-the-box support in Rails
  • https://github.com/scenic-views/scenic

Sequel

Sequel

  • Small core + plugin system
  • Superior to ActiveRecord
  • Simplicity of ActiveRecord and power of Arel
  • Doesn't come with the Rails
  • No schema file
  • https://twin.github.io/ode-to-sequel/

THANK YOU!

QUESTIONS?

... and answers maybe :)

vlado@cingel.hr