This assignment requires familiarity with the lecture materials presented in class through week 04.
If you've ever played Scrabble®, you may know that there are official Scrabble® dictionaries containing lists of legal words. We have the equivalent in file /srv/datasets/scrabble-hybrid
(also available via HTTP), which contains 275381 legal Scrabble® words, one word per line. Here are the first five lines:
LARRIGAN KUNE SINGLETON ACHENIUMS LOCAL
Words in this file shall constitute the entirety of legal Scrabble® words for the purposes of this assignment.
Additionally, file /srv/datasets/scrabble-letter-values
describes the value of each letter tile in Scrabble®.
In order to take full advantage of the space and time efficiency on this one, consider using re.finditer()
to create an iterator that will yield all potential scrabble words in a string. A regular expression matching all contiguous sequences of uppercase English letters is simple enough:
[A-Z]+
You might consider combining this with itertools.chain.from_iterable()
to “flatten” the results of iterating a file object:
>>> import itertools, re >>> sum(1 for _ in itertools.chain.from_iterable( ... re.finditer('[A-Z]+', line.upper()) for line in open('/srv/datasets/party.txt'))) 20
Note you could verify that there are 20 “words” in that file using some command-line utilities, and/or view the words:
$ tr '[:lower:]' '[:upper:]' </srv/datasets/party.txt | grep -oP '[A-Z]+' | wc -l 20 $ tr '[:lower:]' '[:upper:]' </srv/datasets/party.txt | grep -oP '[A-Z]+' THE PARTY TOLD YOU TO REJECT THE EVIDENCE OF YOUR EYES AND EARS IT WAS THEIR FINAL MOST ESSENTIAL COMMAND
You shall implement a module named scrabble
containing definitions for various Scrabble®-related functions. Take care to accommodate the following:
python -m doctest scrabble.py # only produces output if any tests fail python -m doctest -v scrabble.py # gives a detailed summary of all passed/failed tests
""" Generator and other functions related to Scrabble® words. """ from io import TextIOBase # for type hints from typing import Iterable, Iterator # for type hints import itertools # suggested for permutations() and chain.from_iterable() import re # suggested for finditer() # You will want to establish some module-scoped variables representing objects with the information # from files /srv/datasets/scrabble-letter-values and /srv/datasets/scrabble-hybrid # so that the various functions below have access to them as necessary. def tokenize_words(file: TextIOBase) -> Iterator[str]: """ Tokenizes the contents of a text-file object (e.g. from open() or sys.stdin) and yields all contiguous sequences of English letters from the file, in uppercase. >>> from pprint import pprint >>> pprint(list(tokenize_words(open('/srv/datasets/party.txt'))), compact=True) ['THE', 'PARTY', 'TOLD', 'YOU', 'TO', 'REJECT', 'THE', 'EVIDENCE', 'OF', 'YOUR', 'EYES', 'AND', 'EARS', 'IT', 'WAS', 'THEIR', 'FINAL', 'MOST', 'ESSENTIAL', 'COMMAND'] >>> pprint(list(tokenize_words(open('/srv/datasets/phonewords-e.161.txt')))) ['ABC', 'DEF', 'GHI', 'JKL', 'MNO', 'PQRS', 'TUV', 'WXYZ'] """ pass # TODO def legal_words(words: Iterable[str]) -> Iterator[str]: """ Selects from an iterable collection of strings only those that are legal Scrabble® words. >>> from pprint import pprint >>> pprint(list(legal_words(tokenize_words(open('/srv/datasets/party.txt')))), compact=True) ['THE', 'PARTY', 'TOLD', 'YOU', 'TO', 'REJECT', 'THE', 'EVIDENCE', 'OF', 'YOUR', 'EYES', 'AND', 'EARS', 'IT', 'WAS', 'THEIR', 'FINAL', 'MOST', 'ESSENTIAL', 'COMMAND'] >>> pprint(list(legal_words(tokenize_words(open('/srv/datasets/phonewords-e.161.txt'))))) ['DEF', 'GHI'] >>> pprint(list(legal_words(tokenize_words(open('/srv/datasets/empty'))))) [] >>> list(legal_words(['all', 'in', 'lowercase'])) [] """ pass # TODO def word_score(word: str) -> int: """ Computes the sum of the tile values for a given word, or 0 if the word is illegal. >>> from pprint import pprint >>> pprint([(w, word_score(w)) for w in tokenize_words(open('/srv/datasets/party.txt'))], \ compact=True) [('THE', 6), ('PARTY', 10), ('TOLD', 5), ('YOU', 6), ('TO', 2), ('REJECT', 15), ('THE', 6), ('EVIDENCE', 14), ('OF', 5), ('YOUR', 7), ('EYES', 7), ('AND', 4), ('EARS', 4), ('IT', 2), ('WAS', 6), ('THEIR', 8), ('FINAL', 8), ('MOST', 6), ('ESSENTIAL', 9), ('COMMAND', 14)] >>> pprint([(w, word_score(w)) \ for w in tokenize_words(open('/srv/datasets/phonewords-e.161.txt'))], \ compact=True) [('ABC', 0), ('DEF', 7), ('GHI', 7), ('JKL', 0), ('MNO', 0), ('PQRS', 0), ('TUV', 0), ('WXYZ', 0)] >>> word_score('lowercase') 0 """ pass # TODO def highest_value_word(words: Iterable[str]) -> str: """ Selects from an iterable collection of strings the highest-valued Scrabble® word, as returned by the word_score() function. If multiple words are maximal, the function returns the first one encountered. If no words are present, raises an exception of some kind. >>> highest_value_word(tokenize_words(open('/srv/datasets/party.txt'))) 'REJECT' >>> highest_value_word(tokenize_words(open('/srv/datasets/phonewords-e.161.txt'))) 'DEF' >>> try: ... highest_value_word(tokenize_words(open('/srv/datasets/empty'))) ... except Exception as error: ... print('error') ... error """ pass # TODO def legal_tile_words(tiles: str) -> list[str]: """ Returns a sorted list of all the legal Scrabble® words that could be formed from the "tiles" represented by the argument string. >>> legal_tile_words('JTQHDEZ') ['DE', 'ED', 'EDH', 'EH', 'ET', 'ETH', 'HE', 'HET', 'JET', 'TE', 'TED', 'THE', 'ZED'] """ pass # TODO if __name__ == '__main__': # Print the total number of legal words on stdin and their total value in points, just for fun import sys (i1, i2) = itertools.tee(legal_words(tokenize_words(sys.stdin))) print(sum(1 for _ in i1), 'legal Scrabble words worth', sum(map(word_score, i2)), 'points')
As submissions are received, this leaderboard will be updated with the top-performing fully/nearly functional solutions, with regard to execution speed.
Rank | Test Time (s) | Memory Usage (kB) | SLOC (lines) | User |
---|
Submit scrabble.py
via turnin.
Feedback Robot
This project has a feedback robot that will run some tests on your submission and provide you with a feedback report via email within roughly one minute.
Please read the feedback carefully.
Due at 23:59:59 on the date listed on the syllabus.
Assignment 04
is worth 60 points.
Possible point values per category: --------------------------------------- Properly implemented functions 60 (split evenly) Possible deductions: Style and practices 10–20% Possible extra credit: Submission via Git 5% ---------------------------------------