Here is a parser for SQL which builds an AST. The demo code at the end includes examples and a pretty-printer.
#!/usr/bin/env lux
-- Parse SQL select's, modeled after MkSQL by Gordon McMillan
-- 20/02/2001 jcw@equi4.com
-- globals used in this code for now are: input, token, and pos
-- grab the next token off the input string
function tokenize()
local b,e,t=strfind(input,'^%s*(%w+)',pos) -- numbers and identifiers
if not b then
b,e,t=strfind(input,"^%s*(%d+%.%d*)",pos) -- floating point numbers
if not b then
b,e,t=strfind(input,"^%s*('[^'\n']*')",pos) -- quoted literals
if not b then
b,e,t=strfind(input,"^%s*([<>=][=>]?)",pos) -- comparisons
if not b then
b,e,t=strfind(input,"^%s*(.?)",pos) -- other operators
end
end
end
end
pos=e+1
--print('GOT',t)
return t
end
-- only grab a token if there is no pending one
function peek()
if not token then token=tokenize() end
if token~='' then return token end
end
-- grab and consume the next token if it matches
function match(s)
if peek()==s then token=nil; return s end
end
-- make sure the next token matches and consume it
function require(s)
assert(match(s),'require '..s..', got '..(token or '<eof>'))
end
-- utility for parsing binary operators
function binary(fun,a,b)
local r=fun()
while peek()==a or token==b do r={op=match(token);r,fun()} end
return r
end
function p_stmtlist()
local r={op='STMTLIST'}
repeat tinsert(r,p_stmt()) until not match(';')
assert(not peek(),token)
return r
end
function p_stmt()
if match('SELECT') then return p_query() end
assert(nil)
end
function p_query()
local r=p_subquery()
while 1 do
local t=match('EXCEPT') or match('INTERSECT') or match('UNION')
if not t then break end
require('SELECT')
r={op=t;r,p_subquery()}
end
if match('ORDER') then require('BY'); r.o=p_orderbyspec() end
return r
end
function p_subquery()
local r={op='SELECT',f=p_select_list()}
require('FROM')
r.t=p_table_list()
if match('WHERE') then r.w=p_condition() end
if match('GROUP') then require('BY'); r.g=p_groupspec() end
if match('HAVING') then r.h=p_condition() end
return r
end
function p_select_list()
local r={op='SELECTLIST'}
repeat
local r2=match('*')
if not r2 then
r2=p_expr()
if match('.') then
require('*')
r2={op='ALLCOLSOF';r2}
else
match('AS')
if peek() and strfind(token,'[a-z]') then --XXX names lowercase
r2={op='ALIAS';r2,match(token)}
end
end
end
tinsert(r,r2)
until not match(',')
return r
end
function p_table_list()
local r={op='TABLELIST'}
repeat
local r2={op='TABLE';match(peek())}
match('AS')
if peek() and strfind(token,'[a-z]') then --XXX names lowercase
r2.a=match(token)
end
tinsert(r,r2)
until not match(',')
return r
end
function p_groupspec()
local r={op='GROUPSPEC'}
repeat tinsert(r,p_col_ref()) until not match(',')
return r
end
function p_col_ref()
local r=match(peek())
if match('.') then r={op='COLREF';r,match(peek())} end
return r
end
function p_condition()
return binary(p_condition_term,'OR')
end
function p_condition_term()
return binary(p_condition_factor,'AND')
end
function p_condition_factor()
if match('NOT') then return {op='NOT';p_condition_primary()} end
return p_condition_primary()
end
function p_condition_primary()
if match('(') then
local r=p_condition()
require(')')
return r
end
if match('EXISTS') then
require('(')
require('SELECT')
local q=p_subquery()
require(')')
return {op='EXISTS';q}
end
local r=p_expr()
local t=peek()
if t=='<' or t=='<=' or t=='>' or t=='>=' or t=='=' or t=='<>' then
return {op='CMP',r=match(token);r,p_expr()}
end
local f=match('NOT')
if match('BETWEEN') then
local e=p_expr()
require('AND')
return {op='BETWEEN',n=f;r,e,p_expr()}
end
if match('IN') then
require('(')
local l=p_literal_list()
require(')')
return {op='IN',n=f;r,e,l}
end
assert(not f)
return r
end
function p_expr()
return binary(p_expr_term,'+','-')
end
function p_expr_term()
return binary(p_expr_factor,'*','/')
end
function p_expr_factor()
if match('COUNT') then
require('(')
local r=match('*') or p_expr()
require(')')
return {op='COUNT';r}
end
local t=match('AVG') or match('SUM') or match('TOTAL')
if t then require('(')
local r=p_expr()
require(')')
return {op=t;r}
end
if match('(') then
local r=p_expr()
require(')')
return r
end
return p_col_ref()
end
function p_literal_list()
local r={op='LITERALLIST'}
repeat tinsert(r,match(peek())) until not match(',')
return r
end
function p_orderbyspec()
local r={op='ORDERBYSPEC'}
repeat
local r2=p_col_ref()
local t=match('ASC') or match('DESC')
if t then r2={op='ORDEREL',d=t;r2} end
tinsert(r,r2)
until not match(',')
return r
end
-----------------------------------------------------------------------------
do
function treedump(t,s)
local s=s or ''
if type(t)=='table' then
print(s..t.op)
s=strrep(' ',strlen(s))
for i=1,getn(t) do
treedump(t[i],s..i..'. ')
end
for i,v in t do
if type(i)~='number' and i~='n' and i~='op' then
treedump(v,s..i..' ')
end
end
else
print(s..t)
end
end
examples={} -- examples from Gadfly by Aaron Watters
examples[1]=[[
SELECT * FROM frequents
WHERE drinker = 'norm'
]]
examples[2]=[[
SELECT drinker FROM likes
UNION
SELECT drinker FROM frequents
]]
examples[3]=[[
SELECT drinker FROM likes
EXCEPT
SELECT drinker FROM frequents
]]
examples[4]=[[
SELECT * FROM frequents
WHERE drinker>'norm' OR drinker<'b'
]]
examples[5]=[[
SELECT *
FROM frequents AS f, serves AS s
WHERE f.bar = s.bar
]]
examples[6]=[[
SELECT *
FROM frequents AS f, serves AS s
WHERE f.bar = s.bar AND
NOT EXISTS(
SELECT l.drinker, l.beer
FROM likes l
WHERE l.drinker=f.drinker AND s.beer=l.beer)
]]
examples[7]=[[
SELECT SUM(quantity) AS total, AVG(quantity),
COUNT(*) AS tcount, total/tcount
FROM serves
]]
examples[8]=[[
SELECT bar, SUM(quantity) AS total, AVG(quantity),
COUNT(*) AS beers, total/beers
FROM serves
WHERE beer <> 'bud'
GROUP BY bar
HAVING total>500 OR beers>3
ORDER BY 2, bar
]]
examples[9]=[[
SELECT l.drinker, l.beer, COUNT(*), SUM(l.perday*f.perweek)
FROM likes l, frequents f
WHERE l.drinker=f.drinker
GROUP BY l.drinker, l.beer
ORDER BY 4 DESC, l.drinker, l.beer
]]
for i=1,getn(examples) do
print(examples[i])
input,pos,token=examples[i],1,nil
treedump(p_stmtlist())
print(strrep('__',35))
end
end
Input example 1:
SELECT * FROM frequents
WHERE drinker = 'norm'
Sample output:
| STMTLIST | 1. SELECT | w CMP | 1. drinker | 2. 'norm' | r = | t TABLELIST | 1. TABLE | 1. frequents | f SELECTLIST | 1. *
Last modified
2001-02-21
2001-02-21
(216.232.136.19)
Note: you are looking at
the snapshot of an old wiki
- much of this information
is likely to be very outdated
