tutorial/0000755000076500000240000000000010342565237012636 5ustar jcwstaff00000000000000tutorial/blocked_ordered.py0000755000076500000240000000136610327247041016321 0ustar jcwstaff00000000000000import metakit, random, time st = metakit.storage("test.mk", 1) # create a blocked view and # order the view on the first integer value vw = st.getas("large_view[_B[key:I,data:B]]").blocked().ordered(1) t1 = time.time() for i in range(1000000): vw.append((i, str(i))) if i % 10000 == 0: # commit every 10000 entries print i st.commit() t2 = time.time() st.commit() print (t2-t1), "seconds to load", len(vw), "entries" # now test lookup times lookup = [] size = len(vw) for i in range(1000): lookup.append(int(random.random()*size)) t1 = time.time() for i in lookup: vw.find(key=i) t2 = time.time() print (t2-t1), "seconds to lookup up", len(lookup), "entries" print "or", (t2-t1)/len(lookup), "seconds per lookup" tutorial/../metakit/e4s.gif0000744000076500000240000000122507006423724014016 0ustar jcwstaff00000000000000GIF89aa#111ccc11cc,a#PI8ͻ`(`hjJj ǪTxξ0m$w8ϗB% ڴ)P DB v'+H9kmo vzyE{RRIj(u43mr'}zwdwI :13m'|z;ePcFF~9 ȂlЗ&ֹwÖ:hK[* Y0-9(@`4Zr/*Lض*xD]:I# 4 ; Ǚє E4TG6F^X1I"$D$oѦU-w^R L~"^S bK,PɅ7TEaf= 10) resultvw = vw.remapwith(indexvw) for row in resultvw: assert row.id >= 10 indexvw = vw.filter(lambda row: row.id >= 10) vw.remove(indexvw) # make sure that there are only 10 entries assert len(vw) == 10 tutorial/flatten.py0000755000076500000240000000102010327247041014632 0ustar jcwstaff00000000000000import metakit storage = metakit.storage() vw = storage.getas("person[name:S,affiliation[group:S]") vw.append(name="Frank") vw.append(name="Bill") vw[0].affiliation = [{"group":"Physics"},{"group":"Chemistry"}] vw[1].affiliation = [{"group":"Biology"},{"group":"Theatre"}] metakit.dump(vw) print "table using two loops" for row in vw: for affiliation in row.affiliation: print row.name, affiliation.group print print "table using flatten" for row in vw.flatten(vw.affiliation): print row.name, row.group tutorial/hash.py0000755000076500000240000000042610327247041014131 0ustar jcwstaff00000000000000import metakit storage = metakit.storage() view = storage.getas("test[key:S,value1:F,value2:B]") hashvw = storage.getas("__test_hash__[_H:I,_R:I]") view = view.hash(hashvw, 1) view.append(("king", 2.0)) view.append(("king", 4.0)) view.append(("King", 3.0)) metakit.dump(view) tutorial/index_test.py0000755000076500000240000000165410327247041015360 0ustar jcwstaff00000000000000"""Indicies Test""" import metakit st = metakit.storage() vw = st.getas("test[a:I]") # populate rows for i in range(10000): vw.append(i) indices = vw.indices(vw) assert indices[0].index != -1, "Failed retrieving index of view" # test the slicing operator subset = vw[0:10] indices = vw.indices(subset) print "This table should not be populated with -1's" metakit.dump(indices) # the indices should not be -1!!! if indices[0].index == -1: print "Failed retrieving index of subset" else: print "vw[0:10] works okay with vw.indices" print # test the select operator subset = vw.select(0,9) indices = vw.indices(subset) print "This table should not be populated with -1's" metakit.dump(indices) # the indices should not be -1!!! assert indices[0].index != -1, "Failed retrieving index of subset" if indices[0].index == -1: print "Failed retrieving index of subset" else: print "vw.select(lo,hi) works ok with vw.indices" tutorial/join.py0000755000076500000240000000072410327247041014146 0ustar jcwstaff00000000000000import metakit storage = metakit.storage() vw = storage.getas("test1[a:I,b:S]") vw2 = storage.getas("test2[a:I,c:S]") vw.append((0, "view1")) vw2.append((0, "view2")) vw2.append((1, "view2")) metakit.dump(vw.join(vw2, vw.a)) storage = metakit.storage() vw = storage.getas("test1[a:I,b:S]") vw2 = storage.getas("test2[aa:I,c:S]") vw.append((0, "view1")) vw2.append((0, "view2")) vw2.append((1, "view2")) temp = vw2.rename('aa', 'a') metakit.dump(vw.join(temp, vw.a)) tutorial/ordered.py0000755000076500000240000000033210327247041014626 0ustar jcwstaff00000000000000import metakit storage = metakit.storage() view = storage.getas("test[key:S,value1:F]").ordered() for i in range(10): if i % 2 == 0: view.append(("king", i)) else: view.append(("KING", i)) metakit.dump(view) tutorial/../metakit/python.gif0000644000076500000240000000614507167413265014660 0ustar jcwstaff00000000000000GIF89aW  !!!"""###$$$%%%&&&'''((()))***+++,,,---...///000111222333444555666777888999:::;;;<<<===>>>???@@@AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ[[[\\\]]]^^^___```aaabbbcccdddeeefffggghhhiiijjjkkklllmmmnnnooopppqqqrrrssstttuuuvvvwwwxxxyyyzzz{{{|||}}}~~~!,W޽ϟ?s߿_>}'߿{{ϟ?߿}߿|߿޿{;yx/ۿ|]C=o߿ׯ_~߾߿~ׯ_o<~G ߿r Kݿ{߿~ׯ_?߿kgϞ>ϟ?y߿  ޿o@G'߿ׯ?ϟ?t_q>}ӧO_K_vK¹$@A (ga@ ~ϝx>}ӧO>|Ç?z_S߿/=|߿ ߿@ݣq(_7|߿߿o/X8p` `;Gp9?hn P`)<P 'lP8pA 4hРn0 @ ZPP> (Mw0`48F ( @ @`1|@*_w"4P7|,Y. ѐ$I;  Hz"]5 8'-언+1Ͽ?4(`((p_>߿&( @Zo߿d 4E|};HeܿQo] o߄8_ܹq5s?0߿}#j q18{=pvvP,:矷N o-K ޿v>rq?i*p>+ӮRg@[^  pFFR WŃ6p|@epw Metakit for Python

Metakit for Python

The structured database which fits in the palm of your hand

[ Terminology | Installation | Getting started | Mk4py Reference ]

Buzzwords - Metakit is an embeddable database which runs on Unix, Windows, Macintosh, and other platforms. It lets you build applications which store their data efficiently, in a portable way, and which will not need a complex runtime installation. In terms of the data model, Metakit takes the middle ground between RDBMS, OODBMS, and flat-file databases - yet it is quite different from each of them.

Technology - Everything is stored variable-sized yet with efficient positional row access. Changing an existing datafile structure is as simple as re-opening it with that new structure. All changes are transacted. You can mix and match software written in C++, Python, and Tcl. Things can't get much more flexible...

Python - The extension for Python is called "Mk4py". It provides a lower-level API for the Metakit C++ core extension than an earlier version of this interface, and uses SCXX by Gordon McMillan as C++ glue interface.

Mk4py 2.4.9.2 - is a final/production release. The homepage points to a download area with pre-compiled shared libraries for Unix, Windows, and Macintosh. The Metakit source distribution includes this documentation, the Mk4py C++ source code, a "MkMemoIO.py" class which provides efficient and fail-safe I/O (therefore also pickling) using Metakit memo fields, and a few more goodies.

Changes since 2.01 - the MK core has changed substantially:

License and support - Metakit 2 and up are distributed under the liberal X/MIT-style open source license. Commercial support is available through an Enterprise License. See the license page for details.

Credits - Are due to Gordon McMillan for not stopping at the original Mk4py and coming up with a more Pythonic interface, and to Christian Tismer for pushing Mk4py way beyond its design goals. Also to GvR and the Python community for taking scripting to such fascinating heights...

Updates - The latest version of this document is at https://www.equi4.com/metakit/python.html


Terminology

There are several ways to say the same thing, depending on where you're coming from. For example, the terms table, list, collection,array,sequence, and vector all denote a more or less similar concept. To help avoid confusion, Metakit uses a simple (but hopefully precise) terminology.

The terms adopted by Metakit can be summarized as follows:

A few more comments about the semantics of Metakit:


Installation

  1. Download the latest version from https://www.equi4.com/pub/download.html
  2. On Unix, rename the appropriate compiled extension to "Mk4py.so" (on Win/Mac, use the corresponding file)
  3. Place the Mk4py extension as well as the "metakit.py" wrapper somewhere on Python's module search path,
    such as in the site-packages directory (or just leave it in ".")
  4. Do a small test, by running "demo.py". If all is well, you should get some self-explanatory output


Getting started

Create a database:
import metakit
db = metakit.storage("datafile.mk",1)
Create a view (this is the Metakit term for "table"):
vw = db.getas("people[first:S,last:S,shoesize:I]")
Add two rows (this is the Metakit term for "record"):
vw.append(first='John',last='Lennon',shoesize=44)
vw.append(first='Flash',last='Gordon',shoesize=42)
Commit the changes to file:
db.commit()
Show a list of all people:
for r in vw: print r.first, r.last, r.shoesize
Show a list of all people, sorted by last name:
for r in vw.sort(vw.last): print r.first, r.last, r.shoesize
Show a list of all people with first name 'John':
for r in vw.select(first='John'): print r.first, r.last, r.shoesize


Mk4py Reference

  1. Module functions
  2. Storage objects
  3. View objects
  4. Derived views
  5. View operations
  6. Mapping views
  7. Rowref objects
  8. Property objects

1. Module functions

These functions live at the module level. You can use them as described below after executing the following preamble:
     import metakit
print metakit.version

SYNOPSYS

db = metakit.storage()
Create an in-memory database (can't use commit/rollback).  Details...  Notes...
db = metakit.storage(file)
Use a specified file object to build the storage on
db = metakit.storage(name,mode)
Open file, create if absent and rwflag is non-zero. Open read-only if mode is 0, r/w if mode is 1 (cannot be shared), or as commit-extend if mode is 2 (in mode 1 and 2, the file will be created if needed).
vw = metakit.view()
Create a standalone view; not in any storage object
pr = metakit.property(type, name)
Create a property (a column, when associated to a view).    Notes...
vw = metakit.wrap(sequence,proplist,byPos=0)
Wraps a Python sequence as a view.  Details..Notes...
metakit.dump(view)
Prints the contents of the view to the screen.  Metakit.dump is your friend, use it liberally.

ADDITIONAL DETAILS
storage- When given a single argument, the file object must be a real stdio file, not a class implementing the file r/w protocol. When the storage object is destroyed (such as with 'db = None'), the associated datafile will be closed. Be sure to keep a reference to it around as long as you use it.

wrap- This call can be used to wrap any Python sequence, it assumes that each item is either a dictionary or an object with attribute names corresponding to the property names. Alternately, if byPos is nonzero, each item can be a list or tuple - they will then be accessed by position instead. Views created in this way can be used in joins and any other view operations.

2. Storage objects

SYNOPSYS
vw = storage.getas(description)
Locate, define, or re-define a view stored in a storage object.   Notes...
vw = storage.view(viewname)
The normal way to retrieve an existing view.
storage.rollback(full=0)
Revert data and structure as was last committed to disk. In commit-aside mode, a "full" rollback reverts to the state of the original file and forgets about the aside file.
After a rollback, your view objects are invalid (use the view or getas methods on your storage object to get them back). Furthermore, after a full rollback, the aside storage is detached from the main storage. Use the aside method on your main storage object to reattach it. If you do not reattach it, further commits will (try to) write to the main storage.
storage.commit(full=0)
Permanently commit data and structure changes to disk In commit-aside mode, a "full" commit save the latest state in the original file and clears the aside datafile.
ds = storage.description(viewname='')
The description string is described under getas.  Notes...
vw = storage.contents()
Returns the View which holds the meta data for the Storage.  Notes...
storage.autocommit()
Commit changes automatically when the storage object goes away
storage.load(fileobj)
Replace storage contents with data from file (or any other object supporting read such as sys.stdin or StringIO)   Notes...
storage.save(fileobj)
Serialize storage contents to file (or any other object supporting write such as sys.stdout or StringIO)  Notes...
ADDITIONAL DETAILS
description- A description of the entire storage is retured if no viewname is specified, otherwise just the specified top-level view.

getas- Side-effects: the structure of the view is changed.
Notes: Normally used to create a new View, or alter the structure of an existing one.
A description string looks like:
     "people[name:S,addr:S,city:S,state:S,zip:S]"
That is "<viewname>[<propertyname>:<propertytype>...]"
Where the property type is one of:

I
adaptive integer (becomes Python int)

L
64-bit integer (becomes Python long)

F
C float (becomes Python float)

D
C double (is a Python float)

S
C null terminated string (becomes Python string)

B
C array of bytes (becomes Python string)
Careful: do not include white space in the decription string.

In the Python binding, the difference between S and B types is not as important as in C/C++, where S is used for zero-terminated text strings. In Python, the main distinctions are that B properties must be used if the data can contain zero bytes, and that sort order of S (stricmp) and B (memcmp) differ. At some point, Unicode/UTF-8 will also play a role for S properties, so it's best to use S for text.   However, if you are planning on using python's pickle facility it is safest to use the 'B' data type as this supports all pickle modes.  Notes...

Dropping or modifying a view- It may not be obvious at this point, but getas can be called multiple times for the same view.  This is what metakit considers 'restructuring' and it can be done on the fly.  For instance, if you wanted to add a phone number to the people table you simple call getas again as follows:

"people[name:S,addr:S,city:S,state:S,zip:S,phone:S]"

To drop a view, call getas with only the view name:

"people"

3. View objects

View implements sequence (list) methods, including slicing, concatentation etc. They behave as a sequence of "rows", which in turn have "properties". Indexing (getitem) returns a reference to a row, not a copy.  Notes...
     r = view[0]
r.name = 'Julius Caesar'
view[0].name # will yield 'Julius Caesar'
A slice returns a modifiable view which is tied to the underlying view. As special case, however, you can create a fresh empty view with the same structure as another view with:
     v2 = v[0:0]
Setting a slice changes the view:
     v[:] = [] # empties the view
All columns are described with a metakit Property that indicates the name of the column and the type of the column.  A column's property is available from the view as follows:
     view.name # will yield metakit.Property("S", "name")

View also supports getattr, which also returns a Property.  Views can be obtained from Storage objects: view = db.view('inventory') or from other views (see select, sort, flatten, join, project...) or empty, columnless views can be created: vw = metakit.view()

SYNOPSYS

view.insert(index, obj)
Coerce object to a Row and insert at index in View
ix = view.append(obj)
Object is coerced to Row and added to end of View
view.delete(index)
Row at index removed from View
lp = view.structure()
Return a list of property objects
cn = view.addproperty(fileobj)
Define a new property, return its column position

view.map(func, subset=None)
Apply func to each row of view, or (if subset specified) to each row in view that is lso in subset. Func must have the signature "func(row)", and may mutate row. Subset must be a subset of view: e.g. "customers.map(func, customers.select(...))".
rview = view.filter(func)
Return a view containing the indices of those rows satisfying func. Func must have signature "func(row)" and must return a false value to omit the row.  In general, you will use this in conjunction with view.remapwith or view.remove
obj = view.reduce(func,start=0)
Return the result of applying func(row, lastresult) to each row in view.
view.remove(indices)
Remove all rows whose indices are in subset from view. Not the same as view.minus, because unique is not required, and view is not reordered.
rview = view.indices(subset)
Returns a view containing the indices in view of the rows in subset.  The resulting view is suitable for use with view.remapwith or view.remove among others.  Notes...
rview = view.copy()
Returns a copy of the view.
Esoteric methods - if you use these, you know more than I do.
str = view.access(byteprop,rownum,offset,length=0)
Get (partial) byte property contents.
view.modify(byteprop,rownum,string,offset,diff=0)
Store (partial) byte property contents. A non-zero value of diff removes (<0) or inserts (>0) bytes.
n = view.itemsize(prop,rownum=0)
Return size of item (rownum only needed for S/B types). With integer fields, a result of -1/-2/-4 means 1/2/4 bits per value, respectively.


ADDITIONAL DETAILS
addproperty- This adds properties which do not persist when committed. To make them persist, you should use storage.getas(...) when defining (or restructuring) the view.

append- Also support keyword args (colname=value...).  See insert below.

insert- coercion to a Row is driven by the View's columns, and works for:

dictionaries
(column name -> key)

instances
(column name -> attribute name)

lists
(column number -> list index) - watch out!

4. Derived views

SYNOPSYS
vw = view.select(criteria...)
Return a view which has fields matching the given criteria.  Details...
vw = view.select(low,high)
Return a view with rows in the specified range.  This is similar to vw[low:high+1] except that the result can be used in view.indices.
vw = view.sort()
Sort view in "native" order, i.e. the definition order of its keys.  Keys are specified by using a mapping view.
vw = view.sort(property...)
Sort view using the specified properties.  Details... Notes...
vw = view.sortrev((propall...), (proprev...))
Sort view in specified order, with optionally some properties in reverse.  Details...  Notes...
vw = view.project(property...)
Returns a derived view with only the named columns
ADDITIONAL DETAILS
select- Example selections, returning the corresponding subsets:
     result = inventory.select(shoesize=44)
result = inventory.select({'shoesize':40},{'shoesize':43})
result = inventory.select({},{'shoesize':43})
The derived view is "connected"to the base view. Modifications of rows in the derived view are reflected in the base view.

sort- Example, returning the sorted permutation
     result = inventory.sort(inventory.shoesize)
See notes for select concerning changes to the sorted view

sortrev - Example, sort as follows: shoesize: ascending then shoestyle: descending then shoecolor: ascending
result = inventory.sortrev([inventory.shoesize, inventory.shoestyle, inventory.shoecolor], [inventory.shoecolor])

5. View operations

SYNOPSYS
vw = view.flatten(subprop,outer=0)
Produces one 'flat' view from a nested view  Notes...
vw = view.join(view,property...,outer=0)
Both views must have a property (column) of that name and type
ix = view.find(criteria...,start=0)
Returns the index of the found row, or -1.  Details...
ix = view.search(criteria...)
Binary search (native view order), returns match or insertion point.  Details...
ix, cnt = view.locate(criteria...)
Binary search, returns position and count as tuple (count can be zero).  Details...
vw = view.unique()
Returns a new view without duplicate rows (a set)
vw = view.union(view2)
Returns a new view which is the set union of view and view2
vw = view.intersect(view2)
Returns a new view which is the set intersection of view and view2
vw = view.different(view2)
Returns a new view which is the set XOR of view and view2
vw = view.minus(view2)
Returns a new view which is (in set terms) view - view.intersect(view2)
vw = view.remapwith(view2)
Remap rows according to the first (int) property in view2
vw = view.pair(view2)
Concatenate rows pairwise, side by side
vw = view.rename('oldname', 'newname')
Returns a derived view with one property renamed
vw = view.product(view)
Returns the cartesian product of both views.  Notes...
vw = view.groupby(property..., 'subname')
Groups on specified properties, with subviews to hold groups
vw = view.counts(property..., 'name')
Groups on specified properties, replacing rest with a count field
ADDITIONAL DETAILS
find- view[view.find(firstname='Joe')] is essentially the same as view.select(firstname='Joe')[0] but much faster Subsequent finds use the "start"keyword: view.find(firstname='Joe', start=3)
In general you should not use view[view.find(...)] as on failure this will return the last row in the view.  Always check the result if view.find or view.search to ensure that it is not -1.

search, locate- You should probably never use these directly unless you are certain that the property you are searching is orderd.  When using mapping views the fast binary searches will occur automatically.

6. Mapping views

Mapping views create wrappers around ordinary views.  These mapping views enhance normal views in various ways.  Mapping views can speed up access to particular data (hash views, ordered views) or can allow a view to hold more data (blocked views).  In addition, blocked views and ordered views can be combined to give a good tradeoff between data access and amount of data stored.

SYNOPSYS
vw = view.hash(mapview,numkeys=1)
Construct a hash mapping based on the first N fields.  Details..Notes...
vw = view.blocked(blockview)
Construct a "blocked" view, which acts as if all segments together form a single large view.  Details...  Notes...
vw = view.ordered(numkeys=1)
Define a view which assumes and maintains sort order, based on the first N fields. When layered on top of a blocked view, this implements a 2-level btree.  Details... Notes...
ADDITIONAL DETAILS

hash- This view creates and manages a special hash map view, to implement a fast find on the key. The key is defined to consist of the first numKeys_ properties of the underlying view.
The mapview must be empty the first time this hash view is used, so that Metakit can fill it based on whatever rows are already present in the underlying view. After that, neither the underlying view nor the map view may be modified other than through this hash mapping layer. The defined structure of the map view must be "_H:I,_R:I".

This view is modifiable. Insertions and changes to key field properties can cause rows to be repositioned to maintain hash uniqueness.

Careful: when a row is changed in such a way that its key is the same as in another row, that other row will be deleted from the view.
blocked- This view acts like a large flat view, even though the actual rows are stored in blocks, which are rebalanced automatically to maintain a good trade-off between block size and number of blocks.   Use this style of view if you are going to have a view with a great number of records (for example > 250,000) The underlying view must be defined with a single view property named "_B", with the structure of the subview being as needed. 

Example: vw = st.getas("myview[_B[id:I,data:B]]").blocked()

If a view is created in this fashion, blocked must always be called to access the data normally.  Blocked views cannot be hashed, although they can be ordered.

ordered- This is an identity view, which has as its only use to inform Metakit that the underlying view can be considered to be sorted on its first numKeys properties. The effect is that view.find() will try to use binary search when the search includes key properties (results will be identical to unordered views, the find will just be more efficient).

This view is modifiable. Insertions and changes to key field properties can cause rows to be repositioned to maintain the sort order.

This view can be combined with view.blocked(), to create a 2-level btree structure.

7. Rowref objects

RowRef allows setting and getting of attributes (columns)
RowRef encapsulates a (view, ndx) tuple.
Normally obtained from a view: rowref = view[33]

8. Property objects

Property has attributes name, id and type. Example: p = metakit.property('I', 'shoesize')
Note that a property is used to describe a column, but it is NOT the same as a column. That is, in a given storage, the property Property('I', 'shoesize') will be unique, (that is, no matter how many instances you create, they will all have the same property.id). But that one property can describe any number of columns, each one in a different view. This is how joins are done, and why "view.sort(view.firstname)" is the same as "view.sort(metakit.property('S','firstname'))".


Advanced Notes and Usage Tips.

Note: metakit.storage() is very useful for learning how to use metakit. 
Simply open a python interpreter and create an in-memory storage object, then play with it :)

>>> db = metakit.storage()

Then you can play with the storage to figure out some metakit commands that you don't quite understand.

Note: metakit.getas(...) 
You can get into some trouble with getas:
"people[name:S,name:F]"

won't raise any errors for instance and will use the first description of name as a string.

"people[name:S, name:F]"

will create two columns "name" and " name".  The second won't be directly accessible from python.  See this note. 

The two basic rules to follow when creating description strings are:

  1. Don't use spaces or other special characters (use an underscore if necessary, this is common database practice).  In other words "zip_code" not "zip code" and don't use a column named "phone #".  You will be able to retrieve these columns but you will have to go through some shenanigans.
  2. Always start the column name with a letter.

Note: metakit.wrap
Metakit.wrap is a very powerful way of converting python sequences into (temporary) metakit views.  It is a little tricky only because if you are loading from a list of tuples you must use byPos=1 to load the data correctly.  This flags simply stats that the row indexed by X will be loaded as if it were property_list[X].  The example below should make this clear.

import metakit
storage = metakit.storage()

table = [
    (10000 ,8.3,7.1,8.3,8.1),
    (14999,5.5,5.8 ,5.8,6.2),
    (24999 ,11.3,12.7,12.4,13.5),
    (34999,11.9,13.2 ,12.4,13.7),
    (49999,16,2,18.1,16.5 ,17.9),
    (74999 ,20.7,22.7,21.4,21.4),
    (99999,11.6,10.9 ,11.4,10.2),
    (149999,9,6.4,8.6,6.3),
    (199999,2.6,1.5 ,2.1,1.4),
    (200000,2.8,1.5,2.0,1.5),
    ]

headers = ['Income Range',
           'Percent Population In Illinois',
           'Percent Population In Wisconsin',
           'Percent Population In Michigan',
           'Percent Population In Indiana']

properties = [metakit.property('F', h.replace(' ', "_"))
              for h in headers]

# we are using byPos = 1 here since we have the
# input as a list
view = metakit.wrap(table, properties, 1)

metakit.dump(view)
 Income_Range  Percent_Population_In_Illinois  Percent_Population_In_Wisconsin  Percent_Population_In_Michigan  Percent_Population_In_Indiana
 ------------  ------------------------------  -------------------------------  ------------------------------  -----------------------------
      10000.0                   8.30000019073                    7.09999990463                   8.30000019073                  8.10000038147
      14999.0                             5.5                    5.80000019073                   5.80000019073                  6.19999980927
      24999.0                   11.3000001907                    12.6999998093                   12.3999996185                           13.5
      34999.0                   11.8999996185                    13.1999998093                   12.3999996185                  13.6999998093
      49999.0                            16.0                              2.0                   18.1000003815                           16.5
      74999.0                   20.7000007629                    22.7000007629                   21.3999996185                  21.3999996185
      99999.0                   11.6000003815                    10.8999996185                   11.3999996185                  10.1999998093
     149999.0                             9.0                    6.40000009537                   8.60000038147                  6.30000019073
     199999.0                   2.59999990463                              1.5                   2.09999990463                  1.39999997616
     200000.0                   2.79999995232                              1.5                             2.0                            1.5
 ------------  ------------------------------  -------------------------------  ------------------------------  -----------------------------
 Total: 10 rows


Now we can use all the metakit goodness on this view!  It can be joined with any other view, searched, selected and so on.

Note: Metakit properties are case insensitive. 
Actually, most things in metakit are case insensitive.  This can cause problems if you expect "Object" to be different than "object".  Additionally properties can have names that python will not allow as variable names.  When one access a column in metakit it usually goes something like this:

>>> view = db.getas(...)
>>> row = view[index]
>>> data = row.first_name

However, "first name" is also a valid metakit property or even "1st_name" neither of which are valid python variable names.  These must be retrieved using python's magic getattr function.

>>> data = getattr(row, "1st_name")

This will raise an AttributeError exception if row doesn't have a column "1st_name".  You can also use getattr as follows:

>>> data = getattr(row, "1st_name", default)

Which will set data to default if row doesn't have a column "1st_name".

You will run into some issues when columns are named the same as view methods.  For instance, if you have a column named "append" view.append will not return the metakit Property, it will return the append method.  In these cases, you can use the properties method of the view to retrieve a python dictionary describing the relevant properties.


>>> vw = db.getas("people[name:S,addr:S,city:S,state:S,zip:S]")
>>> vw.properties()
{'city': Property('S', 'city'), 'state': Property('S', 'state'), 'name': Property('S', 'name'), 'zip': Property('S', 'zip'), 'addr': Property('S', 'addr')}

Don't be scared away by this complexity, if you are making and using your OWN metakit storage, you can avoid all of this.  This really is only useful when using someone else's storage that you don't have control over and is only added here for the sake of completeness.

Note: storage.contents()
This is one of the neater aspects of metakit.  storage.contents() returns a view which is the structure of the entire database.  Each table in this view is represented as a metakit property of type "V".  Remember that "V" indicates a view or subview type.  This is one of those cases where an example is worth more than this description.

import metakit
st = metakit.storage()

vw = st.getas("test[a:S,b:S,c:S]")
metakit.dump(st.contents())

test 
 ------
 0 rows
 ------
 Total: 1 rows

print st.contents().properties()
{'test': Property('V', 'test')}

So, if we want to iterate through all the tables or views in the database:

for tablename in st.contents().properties():
      vw= st.view(tablename)

Just to prove that all the tables or views are part of the contents view, we could use the more esoteric:

contents = st.contents()
row = contents[0]
for tablename in contents.properties():
      vw= getattr(row, tablename) # same thing as row.<tablename>


Note: storage.save and storage.load
There is a lot of magic in the load and save operations.  For example, metakit usually doesn't reclaim disk space when tables are dropped or rows are removed from views.  The disk space will gradually become used when new data is added.  Saving a storage to a new file will optimize disk usage.  This can be useful for databases where lots of data is dropped.

file = open("newdatabase.mk", "wb")
storage.save(file)
file.close()
storage = metakit.storage("newdatabase.mk", 1)

storage.save can also be used to serialize a metakit storage for transport.  This is normally done with the StringIO or cStringIO class.  In the following snippet, server.send and client.get are left as exercises for the reader :)

try: import cStringIO as StringIO
except ImportError: import StringIO
# server
file = StringIO.StringIO()
storage.save(file)
server.send(file.getvalue())
# client
data = client.get()
file = StringIO.StringIO(data)
storage.load(file)

If you are writing your own socket or html server protocol, the metakit serialized format knows how big it is.  From various emails with Coen Siegerink:

How to I get the storage size from the serialized dump?

It's in bytes 4..7 of the stream (one day to become 3..7), a big-endian int.  This normally also holds for a MK datafile, btw.  So the way to do this is read 8 bytes, determine size, read size-8, put the two pieces back together, then un-serialize. 

How can I ship a subset of a storage?

W.r.t. the storage format, there is a trick which may come in handy.  Consider:
    - ...
    - commit changes
    - restructure, deleting rows and views
    - optionally also add views and some info
    - serialize (to file or in-memory)
    - rollback
    - ...

The above has the effect of "extracting" data from a datafile, and using it to "ship" a subset.
Example Code.


Note: Row references. 
Try to keep row references around as long as possible.  This is especially true when writing GUI applications.  Every time you index a view, you will go through the mechanics of finding the row in the database and returning it as a python object.  While this is a very quick operation, they still add up.  Note that this is true for most python operations when using lists.  So use:

>>> row = view[index]
>>> shoe = row.shoe
>>> shoesize = row.shoesize

instead of:

>>> shoe = view[index].shoe
>>> shoesize = view[index].shoesize


Note: indices
Mk4py uses index views all over the place.  You can consider an indexed view as the result of many view.select calls.  Just like each call to view.select or view.find returns the matching index in a view, an index view contains a whole bunch of matching indices.

Many operations return an indexed view: view.filter, view.indices.  In general, you will use an indexed view with view.remapwith or view.remove

Note: view.filter
view.filter is really wonderfully powerful.  Most of the powerful queries will be created using a combination of this, view.join and view.select. 
To generate a derived view from view.filter use view.remapwith.

Example: return the subview where all id's are greater than or equal to  10
indexvw = vw.filter(lambda row: row.id >= 10)
resultvw = vw.remapwith(indexvw)

Example: remove all rows where the id is greater than 10
indexvw = vw.filter(lambda row: row.id >= 10)
vw.remove(indexvw)

Example code.

Note: view.indices
view.indices has one annoying bit.  One would expect that:

subset = view[10:20]
indicies = view.indices(subset)

would return the proper indicies view, but all rows have an index of -1 which is fairly bad news, since -1 is the index of the last row in the view and calls to things like view.remove will happily remove the last view, multiple times! 
The proper way to get this type of subset is:

subset = view.select(10,19)
indices = view.indices(subset)

Note the non-pythonic view.select(10,19) instead of view.select(10,20) as view.select includes the last row where view[10:20] does not.
Here is some test code.

view.indices works just great with derived views of the types generated by view.select or view.sort.

Note: derived views
select and sort are called derived views because they map back to another view.  Most of the time these can be considered as identical to the base view in that when a row is deleted from a derived view it is also deleted from the base view.  However, in some cases Mk4py cannot make the necessary connection to the base view.  For instance, this occurs when sorting a mapping view or selecting from a sorted view.  In these cases a derived view will be returned as a read only view.  Read only views cannot be modified! However, all is not lost!  In these cases you can generate a modifiable derived view using view.indices and view.remapwith.

Here is some example code.

Note: flattened view
Flattened views are useful then viewing views or tables with subviews.  For example, consider the following table:
vw = st.getas("person[name:S,affiliation[group:S]")

And you wanted to iterate through everybodies affiliation you would have to use two loops:
for row in vw:
     for affiliation in row.affiliation:
          print row.name, affiliation.group

alternatively you could create a flattened view
for row in vw.flatten(vw.affiliation):
     print row.name, row.group

Example code.


Note: view.join
A joined view combines two seperate views on a given property.  These two views are joined when the property in each view has the same value.

import metakit
storage = metakit.storage()
vw = storage.getas("test1[a:I,b:S]")
vw2 = storage.getas("test2[a:I,c:S]")
vw.append((0, "view1"))
vw2.append((0, "view2"))
vw2.append((1, "view2"))
metakit.dump(vw.join(vw2, vw.a))
 a  b      c   
 -  -----  -----
 0  view1  view2
 -  -----  -----
 Total: 1 rows

Notice that the output doesn't contain the row in vw2 where a is 1.  This is because it doesn't exist in vw.  The output of a join is a read only view and cannot be modified.

What if I want to join two views with different property names?
Use view.rename to rename one of the view columns.  For instance (see the bold text below):

import metakit
storage = metakit.storage()
vw = storage.getas("test1[a:I,b:S]")
vw2 = storage.getas("test2[aa:I,c:S]")
vw.append((0, "view1"))
vw2.append((0, "view2"))
vw2.append((1, "view2"))
temp = vw2.rename('aa', 'a')
metakit.dump(vw.join(temp, vw.a))
 a  b      c   
 -  -----  -----
 0  view1  view2
 -  -----  -----
 Total: 1 rows

What if there are properties in both views with the same name and different types?
Example, if you join the views "test[a:I,b:F]" with "test2[a:I,b:S]" what happens to the b attribute?  This is essentially undefined, you can either get the b:F column or the b:S column depending on  whether you are joining test to test2 or test2 to test.  The safest bet is use view.rename to rename one of the views b property.

Note: view.product
The cartesian product of two views is fairly straightforward, it creates a view that combines all rows of view1 with all views of view2.  In essence the operation is as follows:

for row1 in view1:
    for row2 in view2:
         # do something with row1 and row2

Why do this?  This is useful when combining two views in complex fashions.  Here is an SQL-style example
select * from view1, view2 where view1.a > view2.c
What this does is select all rows from view1 and view2 where the row in view1.a is greater then the row in view2.c.  The looping version would be:
for row1 in view1:
    for row2 in view2:
         if row1.a > row2.c:
            result.append((row1, row2))

the view.product version would be :
tmp = view1.product(view2)
indices = tmp.filter(lambda row: row.a > row.c)
result = tmp.remapwith(indices)

view.product has many of the same caveats as view.join when dealing with properties in both views that have the same names.

Note: view.hash
Hashing views can be confusing at first.  Like other mapping views, hash views are controlled through another view that manages all of the gory details of maintaining a hash.  Here is the basic hash recipe:

view = storage.getas("test[key:S,value1:F,value2:B]")
hashvw = storage.getas("__test_hash__[_H:I,_R:I]")
view = view.hash(hashvw, 1)

If you follow this recipe you will hardly ever go wrong.  You must do this every time you want to access your hashed view!  Notice three things:
  1. The original view is replaced with the hash view (created with view.hash).  This is because the original view should never be touched after this!  Replacing the original view with the hash view ensures that this will never happen.
  2. In general, I name the hash view "__%s_hash__"%viewname  This helps me ensure that I always have a unique hash name.
  3. storage.view("test") doesn't know about the hashed view so don't use this with hashed views!  Use the recipe above.
You can create hashes on multiple keys, in the above example using:
view = view.hash(hashvw, 2)
would create a unique key using the first two properties described in the string passed to storage.getas.  In this case they are of type (string, float).  Note that just like python dictionaries, there can only be one row with the same key.

Note: view.blocked
From the mainling list:
I'll use plain English, and let others come up with accurate Python:

* instead of defining a view "blah[a:I,b:S,c:D]", define blah[_B[a:I,b:S,c:D]]
* in other words, don't define a view of rows, but a view of views of rows
* when you open the view, replace:
    view = storage.view("blah")
  with
    view = storage.view("blah").blocked()
* or you can use getas, just make sure the structure is as above
* in other words, don't just use the raw view but pass it through blocked()
* that's it

You cannot mix things.  When blocked, never access the unblocked view.

You cannot convert data as is, the only way to do so is to copy all data in.  In C++ there is a call to insert one view into another (compatible) one, but I think in Python you'll have to copy row by row.

Implementation details https://www.equi4.com/mkblocked.html

Python example:
import metakit, random, time
st = metakit.storage("test.mk", 1)
# create a blocked view and
# order the view on the first integer value
vw = st.getas("large_view[_B[key:I,data:B]]").blocked().ordered(1)

t1 = time.time()
for i in range(1000000):
    vw.append((i, str(i)))
    if i % 10000 == 0:
        # commit every 10000 entries
        print i
        st.commit()
t2 = time.time()
st.commit()
print (t2-t1), "seconds to load", len(vw), "entries"


# now test lookup times
lookup = []
size = len(vw)
for i in range(1000):
    lookup.append(int(random.random()*size))

t1 = time.time()
for i in lookup:
    vw.find(key=i)
t2 = time.time()
print (t2-t1), "seconds to lookup up", len(lookup), "random entries"
print "or", (t2-t1)/len(lookup), "seconds per lookup"


Output from my 2Ghz Pentium 4 running windows 2000
46.3960000277 seconds to load 1000000 entries
0.0600000619888 seconds to lookup up 1000 random entries
or 6.00000619888e-005 seconds per lookup
>>>

Note: view.ordered
Unlike view.hash, an ordered view can have more than one entry with the same key.  Also unlike view.hash, ordered uses a case insensitive ordering, i.e. "king" and "KING" are not identical.
import metakit
storage = metakit.storage()
view = storage.getas("test[key:S,value1:F]").ordered()

for i in range(10):
    if i % 2 == 0: view.append(("king", i))
    else: view.append(("KING", i))

metakit.dump(view)
 key   value1
 ----  ------
 king     0.0
 KING     1.0
 king     2.0
 KING     3.0
 king     4.0
 KING     5.0
 king     6.0
 KING     7.0
 king     8.0
 KING     9.0
 ----  ------
 Total: 10 rows


© 2003 Coen Siegerink <info@equi4.com> tutorial/remove.py0000755000076500000240000000073110327247041014502 0ustar jcwstaff00000000000000"""Indicies Test""" import metakit st = metakit.storage() vw = st.getas("test[a:I]").ordered() # populate rows for i in range(10000): vw.append(i) indices = vw.indices(vw) assert indices[0].index != -1, "Failed retrieving index of view" subset = vw[0:10] indices = vw.indices(subset) print "This table should not be populated with -1's" metakit.dump(indices) # the indices should not be -1!!! assert indices[0].index != -1, "Failed retrieving index of subset" tutorial/select.py0000755000076500000240000000036210327247041014464 0ustar jcwstaff00000000000000"""Select sort and derived views""" import metakit st = metakit.storage() vw = st.getas("test[a:I,b:S,c:F]").ordered() # populate rows for i in range(100): vw.append(a=1, b=str(i), c=float(i)) vw = vw.sort() print vw.select(a=1,c=1.0) tutorial/select_sort.py0000755000076500000240000000105410327247041015532 0ustar jcwstaff00000000000000"""Select sort and derived views""" import metakit st = metakit.storage() vw = st.getas("test[a:I,b:S]").ordered() # populate rows for i in range(100): vw.append(a=i) # When select or sort are used on ordered views the result # is returned in a read only view!!! derived = vw.sort() try: derived[0].a = 20 except TypeError: print "caught TypeError when trying to modify read only view" # We can generate a modifiable view using vw.indices and # vw.remapwith subset = vw.indices(derived) derived = vw.remapwith(subset) derived[0].a = 20 tutorial/serialization.py0000755000076500000240000000206410327247041016063 0ustar jcwstaff00000000000000"""Metakit serialization example""" import metakit try: import cStringIO as StringIO except: import StringIO st = metakit.storage() vw1 = st.getas("table_that_I_do_not_want[a:S,b:S,c:S]") vw2 = st.getas("real_deal[id:I,number:F]") for c in "ABCDEFGHIJKLMNOP": vw1.append(c) for i in range(100): vw2.append((i, float(i))) # this is an in-memory table so we can't commit # we'll just save the state for later # this could have been done with st.commit file = StringIO.StringIO() st.save(file) initialState = file.getvalue() # drop the bad table st.getas("table_that_I_do_not_want") file = StringIO.StringIO() st.save(file) finalState = file.getvalue() # we can 'ship' finalState to a client somewhere st2 = metakit.storage() st2.load(StringIO.StringIO(finalState)) # we should only have one table here print "Final state after dumping" metakit.dump(st2.contents()) print # now reload the initialState # this could also be done with st.rollback() print "reloaded initial state" st.load(StringIO.StringIO(initialState)) metakit.dump(st.contents()) tutorial/test.cmk0000755000076500000240000000775010327247041014316 0ustar jcwstaff00000000000000JL耀 JL ?@@@@@@@AA A0A@APA`ApAAAAAAAAAAAAAAAAABBB BBBBB B$B(B,B0B4B8BC?C@CACBCCCDCECFCGCHCICJCKCLCMCNCOCPCQCRCSCTCUCVCWCXCYCZC[C\C]C^C_C`CaCbCcCdCeCfCgChCiCjCkClCmCnCoCpCqCrCsCtCuCvCwCxCyCzC{C|C}C~CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDD D@ D D D D@ D D D D@ D D D D@ D D D D@ D D DD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDDD@DDD D@ D D D!D@!D!D!D"D@"D"D"D#D@#D#D#D$D@$D$D$D%D@%D%D%D&D@&D&D&D'D@'D'D'D(D@(D(D(D)D@)D)D)D*D@*D*D*D+D@+D+D+D,D@,D,D,D-D@-D-D-D.D@.D.D.D/D@/D/D/D0D@0D0D0D1D@1D1D1D2D@2D2D2D3D@3D3D3D4D@4D4D4D5D@5D5D5D6D@6D6D6D7D@7D7D7D8D@8D8D8D9D@9D9D9D:D@:D:D:D;D@;D;D;DD@>D>D>D?D@?D?D?D@D@@D@D@DAD@ADADADBD@BDBDBDCD@CDCDCDDD@DDDDDDED@EDEDEDFD@FDFDFDGD@GDGDGDHD@HDHDHDID@IDIDIDJD@JDJDJDKD@KDKDKDLD@LDLDLDMD@MDMDMDND@NDNDNDOD@ODODODPD@PDPDPDQD@QDQDQDRD@RDRDRDSD@SDSDSDTD@TDTDTDUD@UDUDUDVD@VDVDVDWD@WDWDWDXD@XDXDXDYD@YDYDYDZD@ZDZDZD[D@[D[D[D\D@\D\D\D]D@]D]D]D^D@^D^D^D_D@_D_D_D`D@`D`D`DaD@aDaDaDbD@bDbDbDcD@cDcDcDdD@dDdDdDeD@eDeDeDfD@fDfDfDgD@gDgDgDhD@hDhDhDiD@iDiDiDjD@jDjDjDkD@kDkDkDlD@lDlDlDmD@mDmDmDnD@nDnDnDoD@oDoDoDpD@pDpDpDqD@qDqDqDrD@rDrDrDsD@sDsDsDtD@tDtDtDuD@uDuDuDvD@vDvDvDwD@wDwDwDxD@xDxDxDyD@yDyDyDtest[v:F]À؀tutorial/wrap.py0000755000076500000240000000144610327247041014162 0ustar jcwstaff00000000000000import metakit storage = metakit.storage() table = [ (10000 ,8.3,7.1,8.3,8.1), (14999,5.5,5.8 ,5.8,6.2), (24999 ,11.3,12.7,12.4,13.5), (34999,11.9,13.2 ,12.4,13.7), (49999,16,2,18.1,16.5 ,17.9), (74999 ,20.7,22.7,21.4,21.4), (99999,11.6,10.9 ,11.4,10.2), (149999,9,6.4,8.6,6.3), (199999,2.6,1.5 ,2.1,1.4), (200000,2.8,1.5,2.0,1.5), ] headers = ['Income Range', 'Percent Population In Illinois', 'Percent Population In Wisconsin', 'Percent Population In Michigan', 'Percent Population In Indiana'] properties = [metakit.property('F', h.replace(' ', "_")) for h in headers] # we are using byPos = 1 here since we have the # input as a list view = metakit.wrap(table, properties, 1) metakit.dump(view)