Dictionaries and Spreadsheets in Standard ML

19 Oct 2022

Standard ML exercises of CSE 425S: “Programming Systems and Languages” at Washington University in St. Louis.

Singly Chained Dictionary
Hashed Dictionary
Spreadsheet
Spreadsheet to Dictionaries

Singly Chained Dictionary

A dictionary data structure is for storing certain information associated with keys. A value is uniquely mapped to a key so that given a key we can find or modify a value. The most basic form of dictionary can be implemented as a singly linked list, assuming that the user does not care about the number of keys need to be managed:

datatype (''k,'v) Record = Empty | Cons of ''k * 'v * (''k, 'v) Record
type (''k, 'v) dictionary = (''k, 'v) Record

fun create() : (''k, 'v) dictionary = Empty

fun get(dict : (''k, 'v) dictionary, key : ''k) : 'v option =
  case dict of
    Empty => NONE
  | Cons (this_key, this_val, next_rec) =>
      if this_key = key
      then SOME this_val
      else get(next_rec, key)

fun put(dict : (''k, 'v) dictionary, key : ''k , value : 'v) : (''k, 'v) dictionary * 'v option =
  let
    val ret = ref NONE
    fun put_helper(d, k, v) =
      case d of
        Empty => Cons (k, v, d)
      | Cons (this_key, this_val, next_rec) =>
          case k = this_key of
            false => 
              Cons (this_key, this_val, put_helper(next_rec, k, v))
          | true => 
              (ret := SOME this_val; Cons (this_key, v, next_rec))
  in
    (put_helper(dict, key, value), !ret)
  end
	
fun remove(dict : (''k, 'v) dictionary, key : ''k) : (''k, 'v) dictionary * 'v option =
  let
    val ret = ref NONE
    fun remove_helper(d, k) =
      case d of
        Empty => Empty
      | Cons (this_key, this_val, next_rec) =>
          case k = this_key of
            false => Cons (this_key, this_val, remove_helper(next_rec, k))
          | true => (ret := SOME this_val; remove_helper(next_rec, k))
  in
    (remove_helper(dict, key), !ret)
  end

fun entries(dict : (''k, 'v) dictionary) : (''k * 'v) list =
  case dict of
    Empty => []
  | Cons (this_key, this_val, next_rec) =>
      (this_key, this_val)::entries(next_rec)

Hashed Dictionary

A more efficient dictionary might organize data into buckets and determine what data go to which bucket with a hash function:

type ''k hash_function = ''k -> int
type (''k, 'v) bucket = (''k * 'v) list
type (''k, 'v) dictionary = (((''k, 'v) bucket) Vector.vector ref * ''k hash_function)

(* Author: Professor Dennis Cosgrove *)
fun positive_remainder(v : int, n : int) : int = 
  let
    val result = v mod n 
  in 
    if result >= 0 
    then result 
    else result + n
end

Here, our dictionary combines a reference to a vector of buckets with a pre-defined hash function. Given an arbitrary key, we first perform hashing upon it and then take the positive remainder of the hashed key divided by the number of buckets as the index into the bucket vector:

fun find_bucket(dict : (''k, 'v) dictionary, key : ''k) : (''k, 'v) bucket =
  let
    val (ref buckets, hash) = dict
    val index = positive_remainder(hash(key), Vector.length(buckets))
  in
    Vector.sub(buckets, index)
  end

To create an empty dictionary:

fun create(bucket_count_request : int, hash : ''k hash_function) : (''k, 'v) dictionary =
  (ref (Vector.tabulate(bucket_count_request, fn(_) => nil)), hash)

The get(), put(), and remove() functions are similar to the singly chained version.

Spreadsheet

A spreadsheet is a two-dimensional data table with the first row acting as headers. We call each table entry as a “cell”. Our implementation is rather simple:

datatype cell = EMPTY | TEXT of string | INTEGER of int
type sheet = cell list list

With a .csv file at hand, we can first convert it into a list of string lists:

structure Csv = struct
(* fun is_separator(c : char) : bool = c = #"/" *)
    
  fun is_new_line(c : char) : bool = c = #"\n"

  fun is_comma(c : char) : bool = c = #","
    
  fun read_csv(csv:string) : string list list =
    List.map (String.fields is_comma) (String.fields is_new_line csv)
end

Then, we can create a spreadsheet by means of parsing each string form the .csv file into a cell:

fun create_sheet(word_lists : string list list) : sheet =
  let
    (* Check if a char is actually an int *)
    fun is_integer(c : char) : bool =
      c = #"0" orelse c = #"1" orelse c = #"2" orelse c = #"3" orelse c = #"4" orelse
      c = #"5" orelse c = #"6" orelse c = #"7" orelse c = #"8" orelse c = #"9"
          
    (* Convert each string in the string list to a cell *)
    fun create_line(word : string list) : cell list =
      case word of
        [] => []
      | ""::rest => EMPTY::create_line(rest)
      | head::rest =>
          (* Check each char in the char list *)
          if List.all is_integer (String.explode head)
          (* Convert a string into an int *)
          then (INTEGER (valOf (Int.fromString head)))::create_line(rest)
          else (TEXT head)::create_line(rest)
  in
    (* Create a cell list list by mapping create_line to 
       each string list in word_lists *)
    List.map create_line word_lists
  end

Below are all the utility methods:

fun row_count(s : sheet) : int =
  foldl (fn (x,acc) => acc + 1) 0 s

fun column_count(s : sheet) : int =
  case s of 
    [] => 0
  | head::_ => foldl (fn (x,acc) => acc + 1) 0 head

fun row_at(s : sheet, row_index : int) : cell list =
  List.nth(s, row_index)

fun cell_in_row_at_column_index(r : cell list, col_index : int) : cell = 
  List.nth(r, col_index)

fun cell_at(s : sheet, row_index : int, col_index : int) : cell = 
  cell_in_row_at_column_index(row_at(s, row_index), col_index)

fun column_at(s : sheet, col_index : int) : cell list =
  case s of
    [] => []
  | head::rest => List.nth(head, col_index)::column_at(rest, col_index)

fun sum_in_cell_list(cells : cell list) : int =
  case cells of
    [] => 0
  | head::rest => 
      case head of
        INTEGER(x) => x + sum_in_cell_list(rest)
      | _ => sum_in_cell_list(rest)

fun sum_in_row(s : sheet, row_index : int) : int =
  sum_in_cell_list(row_at(s, row_index))

fun sum_in_column(s : sheet, column_index : int) : int =
  sum_in_cell_list(column_at(s, column_index))

fun max_in_cell_list(cells : cell list) : int option =
  let
    fun int_list(cells: cell list) : int list =
      case cells of
        [] => []
      | head::rest => 
          case head of
            INTEGER(x) => x::int_list(rest)
          | _ => int_list(rest)
  in 
    case int_list(cells) of 
      [] => NONE
    | _ => SOME (foldl Int.max 0 (int_list(cells)))
  end

fun max_in_row(s : sheet, row_index : int) : int option =
  max_in_cell_list(row_at(s, row_index))

fun max_in_column(s : sheet, column_index : int) : int option =
  max_in_cell_list(column_at(s, column_index))

fun count_if_in_cell_list(cells : cell list, predicate : (cell -> bool)) : int = 
  foldl (fn (c, acc) => if predicate(c) then acc + 1 else acc) 0 cells

fun count_if_in_row(s : sheet, row_index : int, predicate : (cell -> bool)) : int = 
  count_if_in_cell_list(row_at(s, row_index), predicate)

fun count_if_in_column(s : sheet, col_index : int, predicate : (cell -> bool)) : int = 
  count_if_in_cell_list(column_at(s, col_index), predicate)

Spreadsheet to Dictionaries

Given this spreadsheet:

Name	Uniform Number	Birth Year	Games Played	Goals	Assists
Bobby Orr	4	1948	657	270	645
Wayne Gretzky	99	1961	1487	894	1963
Mario Lemieux	66	1965	915	690	1033

The function to_dictionaires_using_headers_as_keys should return a list with 3 single list dictionaries, one for each non-header row. The dictionaries would be filled with entries for each column, using the cell in the header of the column as the key and the cell in the particular row of the column as the value.

[ { TEXT("Name") => TEXT("Bobby Orr"), TEXT("Uniform Number") => INTEGER(4), TEXT("Birth Year") => INTEGER(1948), TEXT("Games Played") => INTEGER(657), TEXT("Goals") => INTEGER(270), TEXT("Assists") => INTEGER(645) }, 
  { TEXT("Name") => TEXT("Wayne Gretzky"), TEXT("Uniform Number") => INTEGER(99), TEXT("Birth Year") => INTEGER(1961), INTEGER("Games Played") => INTEGER(1487), TEXT("Goals") => INTEGER(894), TEXT("Assists") => INTEGER(1963) }, 
  { TEXT("Name") => TEXT("Mario Lemieux"), TEXT("Uniform Number") => INTEGER(66), TEXT("Birth Year") => INTEGER(1965), INTEGER("Games Played") => INTEGER(915),  TEXT("Goals") => INTEGER(690), TEXT("Assists") => INTEGER(1033) } ]

structure SpreadsheetToDictionaries = struct
  open Spreadsheet

  fun spreadsheet_to_dictionaries_using_headers_as_keys(s : sheet) : (cell, cell) SingleChainedDictionary.dictionary list =

    let
      (* returns a cell list, which is the first row of s *)
      val keys = Spreadsheet.row_at(s, 0)

      (* returns a (key, value) pair list list *)
      fun spreadsheet_to_kvs_list(s) =
        let
          fun values l =
            case l of
              [] => []
            | _::rest => rest
        in
          List.map (fn clist => ListPair.zip(keys, clist)) (values s)
        end
      
      (* returns a dictionary given a list of pairs *)
      fun pairs_to_dictionary(ps, dict) =
        case ps of
          [] => dict
        | (k, v)::t =>
            let
              val (new_dict, _) = SingleChainedDictionary.put(dict, k, v)
            in
              pairs_to_dictionary(t, new_dict)
            end
      
      (* returns a dictionary list given a pair list list *)
      fun spreadsheet_to_dict_helper(pslist, dict) =
        List.map (fn ps => pairs_to_dictionary(ps, dict)) pslist
    in
      spreadsheet_to_dict_helper(spreadsheet_to_kvs_list(s), SingleChainedDictionary.create())
    end

end

行健的个人主页

Dictionaries and Spreadsheets in Standard ML

Table of Contents

Singly Chained Dictionary

Hashed Dictionary

Spreadsheet

Spreadsheet to Dictionaries

行健的个人主页

Dictionaries and Spreadsheets in Standard ML

Table of Contents

Singly Chained Dictionary

Hashed Dictionary

Spreadsheet

Spreadsheet to Dictionaries

Related Posts