#! /usr/bin/env python # # Produce scatterplots of multidimensional data. # # Read input of the form: # 1.1 2.2 3.3 4.4 # 1.2 2.4 3.1 5.6 # ... # where each line contains n_fields numeric fields, # and allow the user to produce scatterplots of # Field A versus Field B for any desired A and B. # # To do: # - Put labels on the axes (really a scatterplot.py issue). # - (Maybe) Offer an option to require equal scaling factors for # x and y (mostly a scatterplot.py issue). # - Make windows respond to resizing (probably a scatterplot.py # issue). # # x Display the number of points and the number selected. # x Avoid creation of multiple scatterplots with the same name. # (It causes trouble during deletion.) # x Make buttons' widths equal. # x Allow deletion of the selected points. # x Do the right thing when the user closes a scatterplot - either # by catching some kind of callback, or by recognizing the # exception that occurs when we try to replot it. # x Allow complementation of the selected set. # x Add command-line "demo" option. # # Written and placed in the public domain by Peter Pearson. # import re import string import sys import Tkinter import scatterplot import math class Datapoint: def __init__( self, values, selected = False ): self.values = values self.selected = selected self.distance = None def distance_from( self, other ): assert len( self.values ) == len( other.values ) x = 0 for i in range( len( self.values ) ): x = x + ( self.values[i] - other.values[i] )**2 return math.sqrt( x ) class Scatterplot_tracker: def __init__( self, splot, id, x_index, y_index ): self.splot = splot self.id = id self.x_index = x_index self.y_index = y_index class Plot_maker_controls: """Make a GUI frame to control the generation of scatterplots.""" def __init__( self, parent_window ): def chooser( pw, title, n_choices, choice_variable ): """Make a frame with a column of radiobuttons.""" f = Tkinter.Frame( pw, borderwidth = 2, relief = Tkinter.RIDGE ) Tkinter.Label( f, text = title ).grid() for i in range( n_fields ): Tkinter.Radiobutton( f, text = "column " + str(i), \ variable = choice_variable, value = i ) \ .grid() return f plot_maker_frame = Tkinter.Frame( parent_window, borderwidth = 2, \ relief = Tkinter.RIDGE ) Tkinter.Label( plot_maker_frame, text = "Make a scatterplot" ) \ .grid( row = 1, column = 0, columnspan = 2 ) self.x_choice_var = Tkinter.IntVar() self.x_choice_var.set( 0 ) self.y_choice_var = Tkinter.IntVar() self.y_choice_var.set( 1 ) x_chooser = chooser( plot_maker_frame, "Take X from", n_fields, \ self.x_choice_var ) y_chooser = chooser( plot_maker_frame, "Take Y from", n_fields, \ self.y_choice_var ) x_chooser.grid( row = 2, column = 0 ) y_chooser.grid( row = 2, column = 1 ) Tkinter.Button( plot_maker_frame, text = "Make plot", \ command = makeplot ) \ .grid() self.frame = plot_maker_frame ###################################################################### # # Functions that respond to user actions: def makeplot(): """Service the "Make plot" button: Using the specified columns of table v for x and y, make a scatterplot.""" global s_list x_choice = plot_maker.x_choice_var.get() y_choice = plot_maker.y_choice_var.get() xy = map( lambda z: (z.values[x_choice], z.values[y_choice]), v ) # To ensure unique IDs, we append a counter: id = "%d_v_%d" % ( y_choice, x_choice ) if id_counters.has_key( id ): id_counters[id] = id_counters[id] + 1 id = "%s(%d)" % ( id, id_counters[id] ) else: id_counters[id] = 1 s = scatterplot.Scatterplot( xy, \ parent_window = t, \ width = 350, height = 350, \ id = id ) s_list.append( Scatterplot_tracker( s, id, x_choice, y_choice ) ) s.set_callback( mouse_click_callback, "mouse-click" ) s.set_callback( window_delete_callback, "window-delete" ) # If this scatterplot is being created late in the game, # after the user has selected some points, then we have # to replot the selected points: selected = filter( lambda z: z.selected, v ) if len( selected ) > 0: xy = map( lambda z: (z.values[x_choice], z.values[y_choice]), selected ) plot_list_selected( s_list[-1], xy ) def unselect_all(): """Respond to the "unselect all" button.""" unselect_and_replot( v ) update_button_disablings() def recruit_neighbor(): """Respond to the "Recruit ~1 neighbor" button. Associate each unselected point with its distance from the nearest selected point. Select all points whose distances equal the shortest. """ global latest_recruits selected = [] unselected = [] for vv in v: if vv.selected: selected.append( vv ) else: unselected.append( vv ) shortest = distance_between_sets( selected, unselected ) latest_recruits = filter( lambda z: z.distance <= shortest, \ unselected ) for vv in latest_recruits: vv.selected = True replot_all_scatterplots( s_list, v ) update_button_disablings() def recruit_aggressively(): """Respond to the "Recruit aggressively" button. Find the shortest distance between the set of selected points and the set of unselected points. Select the smallest number of unselected points sufficient to ensure that no unselected point is within that distance of a selected point. """ global latest_recruits latest_recruits = [] selected = [] unselected = [] for vv in v: if vv.selected: selected.append( vv ) else: unselected.append( vv ) shortest = distance_between_sets( selected, unselected ) while 1: being_moved = filter( lambda z: z.distance <= shortest, \ unselected ) if len( being_moved ) < 1: break latest_recruits.extend( being_moved ) for vv in being_moved: vv.selected = True unselected.remove( vv ) if shortest < distance_between_sets( being_moved, unselected ): break replot_all_scatterplots( s_list, v ) update_button_disablings() def undo_recruit(): """Respond to the "Undo recruit" button.""" global latest_recruits if len( latest_recruits ) > 0: for r in latest_recruits: r.selected = False replot_all_scatterplots( s_list, v ) latest_recruits = [] update_button_disablings() def print_selected(): """Respond to the "Print selected points" button.""" print "Selected points:" for vv in v: if vv.selected: print vv.values def complement_selection(): """Respond to the "Complement selection" button.""" for vv in v: vv.selected = not vv.selected replot_all_scatterplots( s_list, v ) update_button_disablings() def delete_selected(): """Respond to the "Delete selected points" button.""" global latest_recruits global v v = filter( lambda z: z.selected == False, v ) latest_recruits = [] replot_all_scatterplots( s_list, v ) update_button_disablings() def mouse_click_callback( x, y, id ): """Called when mouse clicks on one of our scatterplots.""" global latest_recruits s_index = map( lambda x: x.id, s_list ).index( id ) s = s_list[ s_index ] point_index = closest_point( v, s.x_index, s.y_index, x, y ) v[ point_index ].selected = not v[ point_index ].selected if v[ point_index ].selected: latest_recruits = [ v[ point_index ] ] replot_all_scatterplots( s_list, v ) update_button_disablings() def window_delete_callback( id ): """Called when the user closes one of the scatterplot windows.""" doomed = filter( lambda z: z.id == id, s_list ) for d in doomed: s_list.remove( d ) ###################################################################### # def distance_between_sets( selected, unselected ): """Return the shortest distance between an element in "selected" and an element in "unselected". Side effect: set the "distance" attribute of each element of "unselected" to the shortest distance between it and "selected".""" def my_min( a, b ): if a == None: return b if b == None: return a return min( a, b ) result = None for vv in unselected: shortest = None for ww in selected: shortest = my_min( shortest, vv.distance_from( ww ) ) vv.distance = shortest result = my_min( result, shortest ) return result def closest_point( v, x_index, y_index, x, y ): d = map( lambda z: ( x - z.values[x_index] )**2 + \ ( y - z.values[y_index] )**2, v ) m = min( d ) return d.index( m ) def plot_list_unselected( s, points ): s.splot.plot( points, color = "black", size = 2 ) def plot_list_selected( s, points ): s.splot.plot( points, color = "red", size = 4 ) def replot_all_scatterplots( s_list, v ): selected = [] unselected = [] for vv in v: if vv.selected: selected.append( vv.values ) else: unselected.append( vv.values ) set_line1_values( len( v ), n_fields, len( selected ) ) for s in s_list: s.splot.clear() xy = map( lambda z: (z[s.x_index], z[s.y_index]), unselected ) plot_list_unselected( s, xy ) xy = map( lambda z: (z[s.x_index], z[s.y_index]), selected ) plot_list_selected( s, xy ) def update_button_disablings(): """Enable or disable buttons in the control panel, as appropriate.""" def enable_or_disable( button, enabled ): if enabled: w = "normal" else: w = "disabled" button.config( state = w ) n_selected = len( filter( lambda z: z.selected, v ) ) n_total = len( v ) enable_or_disable( button_unselect_all, n_selected > 0 ) enable_or_disable( button_recruit_neighbor, \ n_selected > 0 and n_selected < n_total ) enable_or_disable( button_print_selected, n_selected > 0 ) enable_or_disable( button_recruit_aggressively, \ n_selected > 0 and n_selected < n_total ) enable_or_disable( button_undo_recruit, \ n_selected > 0 and len( latest_recruits ) > 0 ) enable_or_disable( button_complement_selection, \ n_selected > 0 ) enable_or_disable( button_delete_selected, \ n_selected > 0 and n_selected < n_total ) def mark_all_unselected( v ): """Mark all points as "not selected".""" for vv in v: vv.selected = False def unselect_and_replot( v ): """Mark all points as "not selected", then replot.""" mark_all_unselected( v ) replot_all_scatterplots( s_list, v ) def set_line1_values( a, b, c ): line_1_var.set( "%d points of %d dimensions, %d selected" % ( a, b, c ) ) def get_floats( s ): result = [] skip = 0 while skip < len(s): m = re.match( r'\s*([-+]?\d+(\.\d*)?|\d*\.\d+([eE][-+]?\d+)?)', s[skip:] ) if m == None: return result result.append( float( m.group( 1 ) ) ) skip = skip + m.end() return result def make_demo_dataset(): import random import math def noise( a ): return a * ( random.random() - 0.5 ) v = [] n_values = 3 for i in range( 300 ): w = ( random.random() - 0.5 ) * 1.6 * math.pi v.append( Datapoint( ( math.cos( w ) + noise( .1 ), \ 0.707 * math.sin( w ) + noise( .1 ), \ 0.707 * math.sin( w ) + noise( .1 )) ) ) w = ( random.random() - 0.5 ) * 1.6 * math.pi v.append( Datapoint( ( -math.cos( w ) + noise( .2 ), \ 0.707 * math.sin( w ) + noise( .2 ), \ -0.707 * math.sin( w ) + noise( .2 ) ) ) ) return v, n_values if __name__ == "__main__": import sys demo_mode = False if len( sys.argv ) > 1: if len( sys.argv ) == 2 and sys.argv[1] == "-demo": v, n_values = make_demo_dataset() demo_mode = True else: print "Usage: %s 0: values = get_floats( stripped ) if n_values == None: n_values = len( values ) elif n_values != len( values ): raise RuntimeError, \ "Number of values changed from %d to %d." % \ ( n_values, len( values ) ) assert n_values > 1 v.append( Datapoint( values, False ) ) assert len(v) > 0 n_fields = len( v[0].values ) assert n_fields > 1 id_counters = {} t = Tkinter.Tk() t.title( "Multiscatter control" ) s_list = [] # Keep a list of the scatterplots we've created. latest_recruits = [] n_row = 0 line_1_var = Tkinter.StringVar() set_line1_values( len(v), n_fields, 0 ) Tkinter.Label( t, textvariable = line_1_var ) \ .grid( row = n_row, column = 0, columnspan = 2 ) n_row = n_row + 1 Tkinter.Label( t, text = "Point coordinates in %d columns." % n_fields ) \ .grid( row = n_row, column = 0, columnspan = 2 ) n_row = n_row + 1 plot_maker = Plot_maker_controls( t ) plot_maker.frame.grid( row = n_row, column = 0, columnspan = 2 ) n_row = n_row + 1 button_recruit_neighbor = Tkinter.Button( t, text = "Recruit ~1 neighbor", \ command = recruit_neighbor, \ state = "disabled" ) button_recruit_neighbor.grid( sticky = "EW", row = n_row, column = 0 ) button_print_selected = Tkinter.Button( t, text = "Print selected points", \ command = print_selected, \ state = "disabled" ) button_print_selected.grid( sticky = "EW", row = n_row, column = 1 ) n_row = n_row + 1 button_recruit_aggressively = \ Tkinter.Button( t, \ text = "Recruit aggressively", \ command = recruit_aggressively, \ state = "disabled" ) button_recruit_aggressively.grid( sticky = "EW", row = n_row, column = 0 ) button_delete_selected = Tkinter.Button( t, text = "Delete selected points", \ command = delete_selected, \ state="disabled" ) button_delete_selected.grid( sticky = "EW", row = n_row, column = 1 ) n_row = n_row + 1 button_undo_recruit = Tkinter.Button( t, text = "Undo last recruit", \ command = undo_recruit, \ state="disabled" ) button_undo_recruit.grid( sticky = "EW", row = n_row, column = 0 ) n_row = n_row + 1 button_complement_selection = \ Tkinter.Button( t, \ text = "Complement selection", \ command = complement_selection, \ state = "disabled" ) button_complement_selection.grid( sticky = "EW", row = n_row, column = 0 ) n_row = n_row + 1 button_unselect_all = Tkinter.Button( t, text = "Unselect all", \ command = unselect_all, \ state = "disabled" ) button_unselect_all.grid( sticky = "EW", row = n_row, column = 0 ) Tkinter.Button( t, text = "Quit", command = t.quit ).grid( \ sticky = "EW", row = n_row, column = 1 ) t.mainloop()