see instructions
#-----Statement of Authorship----------------------------------------# # # This is an individual assessment item. By submitting this # code I agree that it represents my own work. I am aware of # the University rule that a student must not act in a manner # which constitutes academic dishonesty as stated and explained # in QUT's Manual of Policies and Procedures, Section C/5.3 # "Academic Integrity" and Section E/2.1 "Student Code of Conduct". # # Student no: PUT YOUR STUDENT NUMBER HERE # Student name: PUT YOUR NAME HERE # # NB: Files submitted without a completed copy of this statement # will not be marked. Submitted files will be subjected to # software plagiarism analysis using the MoSS system # (http://theory.stanford.edu/~aiken/moss/). # #--------------------------------------------------------------------# #-----Assignment Description-----------------------------------------# # # Runners-Up # # In this assignment you will combine your knowledge of HTMl # mark-up languages with your skills in Python scripting, pattern # matching, and Graphical User Interface design to produce a useful # application that allows the user to access online data. See the # instruction sheet accompanying this file for full details. # #--------------------------------------------------------------------# #-----Imported Functions---------------------------------------------# # # Below are various import statements for helpful functions. You # should be able to complete this assignment using these functions # only. You can import other functions provided they are standard # ones that come with the default Python/IDLE implementation and NOT # functions from modules that need to be downloaded and installed # separately. Note that not all of the imported functions below are # needed to successfully complete this assignment. # The function for accessing a web document given its URL. # (You WILL need to use this function in your solution, # either directly or via our "download" function.) from urllib.request import urlopen # The function for displaying a web document in the host # operating system's default web browser. We have given # the function a distinct name to distinguish it from the # built-in "open" function for opening local files. # (You WILL need to use this function in your solution.) from webbrowser import open as urldisplay # Import some standard Tkinter functions. (You WILL need to use # some of these functions in your solution.) You may also # import other widgets from the "tkinter" module, provided they # are standard ones and don't need to be downloaded and installed # separately. from tkinter import * from tkinter.scrolledtext import ScrolledText from tkinter.ttk import Progressbar # Functions for finding all occurrences of a pattern # defined via a regular expression, as well as # the "multiline" and "dotall" flags. (You do NOT need to # use these functions in your solution, because the problem # can be solved with the string "find" function, but it will # be difficult to produce a concise and robust solution # without using regular expressions.) from re import findall, finditer, MULTILINE, DOTALL # Import the standard SQLite functions (just in case they're # needed). from sqlite3 import * # #--------------------------------------------------------------------# #--------------------------------------------------------------------# # # A function to download and save a web document. If the # attempted download fails, an error message is written to # the shell window and the special value None is returned. # # Parameters: # * url - The address of the web page you want to download. # * target_filename - Name of the file to be saved (if any). # * filename_extension - Extension for the target file, usually # "html" for an HTML document or "xhtml" for an XML # document. # * save_file - A file is saved only if this is True. WARNING: # The function will silently overwrite the target file # if it already exists! # * char_set - The character set used by the web page, which is # usually Unicode UTF-8, although some web pages use other # character sets. # * lying - If True the Python function will try to hide its # identity from the web server. This can sometimes be used # to prevent the server from blocking access to Python # programs. However we do NOT encourage using this option # as it is both unreliable and unethical! # * got_the_message - Set this to True once you've absorbed the # message above about Internet ethics. # def download(url = 'http://www.wikipedia.org/', target_filename = 'download', filename_extension = 'html', save_file = True, char_set = 'UTF-8', lying = False, got_the_message = False): # Import the function for opening online documents and # the class for creating requests from urllib.request import urlopen, Request # Import an exception raised when a web server denies access # to a document from urllib.error import HTTPError # Open the web document for reading try: if lying: # Pretend to be something other than a Python # script (NOT RELIABLE OR RECOMMENDED!) request = Request(url) request.add_header('User-Agent', 'Mozilla/5.0') if not got_the_message: print("Warning - Request does not reveal client's true identity.") print(" This is both unreliable and unethical!") print(" Proceed at your own risk!\n") else: # Behave ethically request = url web_page = urlopen(request) except ValueError: print("Download error - Cannot find document at URL '" + url + "'\n") return None except HTTPError: print("Download error - Access denied to document at URL '" + url + "'\n") return None except Exception as message: print("Download error - Something went wrong when trying to download " + \ "the document at URL '" + url + "'") print("Error message was:", message, "\n") return None # Read the contents as a character string try: web_page_contents = web_page.read().decode(char_set) except UnicodeDecodeError: print("Download error - Unable to decode document from URL '" + \ url + "' as '" + char_set + "' characters\n") return None except Exception as message: print("Download error - Something went wrong when trying to decode " + \ "the document from URL '" + url + "'") print("Error message was:", message, "\n") return None # Optionally write the contents to a local text file # (overwriting the file if it already exists!) if save_file: try: text_file = open(target_filename + '.' + filename_extension, 'w', encoding = char_set) text_file.write(web_page_contents) text_file.close() except Exception as message: print("Download error - Unable to write to file '" + \ target_filename + "'") print("Error message was:", message, "\n") # Return the downloaded document to the caller return web_page_contents # #--------------------------------------------------------------------# #--------------------------------------------------------------------# # # A function to open a local HTML file in your operating # system's default web browser. (Note that Python's "webbrowser" # module does not guarantee to open local files, even if you use a # 'file://..." address). The file to be opened must be in the same # folder as this module. # # Since this code is platform-dependent we do NOT guarantee that it # will work on all systems. # def open_html_file(file_name): # Import operating system functions from os import system from os.path import isfile # Remove any platform-specific path prefixes from the # filename local_file = file_name[file_name.rfind('/') + 1:] # Unix local_file = local_file[local_file.rfind('\\') + 1:] # DOS # Confirm that the file name has an HTML extension