r""" CS 20P n-grams! For the purposes of this assignment, a word shall be defined as a sequence of non-whitespace characters, with the following characters stripped from either end: !"#%&'()*,-./:;?@\_¡§¶·¿ """ __author__ = 'Someone in CS 20P, someone@jeff.cis.cabrillo.edu' def n_grams(text: str, n_gram_len: int, min_count: int = 2) -> dict[int, list[tuple[str]]]: """ Finds and returns all word n-grams of length `n_gram_len` occurring at least `min_count` times in `text`. :param text: the text to analyze :param n_gram_len: the desired length of n-grams (e.g. 2 for 2-grams) :param min_count: the minimum number of times a n-gram must appear in the text to be counted :return a dictionary mapping n-gram occurrence counts to a list of the n-grams occurring that number of times, as a list of n_gram_len-tuples of strings in ascending lexicographic/alphabetical order of the n-gram words. """ pass # TODO def most_frequent_n_grams(text: str, min_len: int = 1, max_len: int = 10, limit: int = 5) -> dict[int, list[tuple[tuple[str], int]]]: """ Returns a dictionary mapping n-gram lengths to a list of the most frequently occurring word n-grams of that length, along with their occurrence counts, for n-grams appearing at least twice. :param text: the text to analyze :param min_len: the minimum n-gram length :param max_len: the maximum n-gram length :param limit: the maximum number of n-grams to display for each length :return a dictionary mapping n-gram lengths to a list of the most frequently occurring n-grams of that length, along with their occurrence counts, as a list of 2-tuples, where each tuple contains a tuple of the words in an n-gram and the n-gram's occurrence count. The list shall be sorted in descending order of occurrence count, with ties broken in ascending lexicographic/alphabetical order of the n-gram words. """ pass # TODO def main(): """ Expects one or two command-line arguments: sys.argv[1]: A length of n-gram (e.g. 2 for bigrams) sys.argv[2] (optional): A minimum occurrence count (2 if unspecified) Then prints, in descending order of occurrence count, lines containing (a) the occurrence count and (b) a comma-separated list of all n-grams with that occurrence count, in ascending alphabetical/lexicographic order. """ pass # TODO if __name__ == '__main__': main()