DELTA 116 0 338
SVN  t/d K d5]{\subsubsection{Test: #1}
				\noindent\textit{Purpose.} #2
			
				\bigskip
			
				\noindent\textit{Setup.} #3
				
				\bigskip
				
				\noindent\textit{Execution.} #4
				
				\bigskip
				
				\noindent\textit{Evaluation.} #5}

\begin{document}
\svnInfo $Id$ 
\maketitle

\section{Introduction}
% motivate the document, briefly describe the module

\subsection{Testing Framework}
% find/describe framework

\subsection{Simulation}
% stubbed/simulated components, how it will be done

\section{Test Cases}
Test cases are divided into three categories. Requirements-based tests are derived from the component-level requirements listed in \cite{spec}. These tests are exclusively white-box. Specification-based tests cover interfaces described in \cite{arch} and \cite{spec} but not directly related to requirements. Design-based tests are black-box tests that examine the specific functionality presented by the prototype described in \cite{dsgn}.

Each test is given a name, a motivating purpose, and procedure for its setup, execution, and evaluation to a boolean pass/fail value.

\subsection{Requirements-Based Tests}
Only three component requirements for the scraper are listed in \cite{spec}. It must correctly import assignment data, accept changes to existing data, and accept new data. Thus, three white-box acceptance test cases are defined to establish confidence that these requirements are fulfilled.

Because the distinction between relevant and irrelevant information is a human creation, it must be tested using human input. Thus, the first test case is this suite's only non-automatable test case. It is, however, critical as a validator of the algorithm's correctness. It cannot be run on an as regular basis as can the rest of the tests in this document, but it must be run periodically to assess the overall effectiveness of the algorithm.

\test{Intuitive Scraping Correctness}
{Ensure that relevant information is extracted from course Web pages. This is stipulated by the first requirement for the WebImporter component \cite{spec}.}
{Select a current Web page from a higher education institution listing assignments for the course. The page should be written in some structured format such as a list or table rather than prose. Repeat this process ten times to create a set of ten inputs to the test.}
{Using the same functions that would be invoked if the UI were manipulated directly, request that Cue import each selected Web page into the database. (Note that, if the approach suggested by the \texttt{scraper.py} prototype is used, this operation will require human intervention.) Capture the information that Cue initially extracts from the page and present it to a user who has been made familiar with the input pages. The user rates the relevance of the extracted information on a Likert scale from $1$ to $7$. A score of $1$ indicates that no useful information is present; $3$ indicates that about half of the relevant information on the page has been extracted; $7$ indicates that all relevant information was extracted. ``Relevant'' information should be defined for the user as ``information necessary for a full description of any single assignment.''}
{The test passes if and only if at most $1$ input page is given a $1$ rating and at least $5$ pages receive a rating of $5$ or greater.}

\test{Updated Assignments}
{Ensure the correct updating of assignment information as required in the specifications of WebImporter component \cite{spec}.}
{A plausible but contrived Web page should be constructed as the test is written. It should be verified that Cue behaves well when importing the page, extracting all relevant information (a $7$ rating as above). Each data field in the page should be filled with distinct (not necessarily meaningful) text. When executing the test, import the page into Cue's database as described above.}
{Change the first character in each field of the contrived Web page by incrementing its character code. In Cue, invoke a refresh of the database's assignment data; obtain the data extracted from the modified Web page.}
{The test passes if and only if Cue's new database data exactly matches the modified fields in the Web page.}

\test{New Assignments}
{Verify the addition of new assignment information from course Web pages. This behavior is required by WebImporter component requirements \cite{spec}.}
{As in the previous test, a Web page is constructed when the test is written that is shown to behave very well under Cue's importing functionality. Again, each field should be filled with distinct text. The page is imported into Cue's database using the procedure outlined above.}
{Add an entry to the top of the contrived Web page by repeating the markup that delineates assignments in the page. Fill every field in the new assignment with text distinct from other fields in the new assignment and elsewhere in the page. Invoke a refresh of Cue's assignment database. Collect the assignment data extracted from the Web page.}
{The test passes if and only if all old assignment information was preserved across the refresh and the new assignment was added. The new assignment's extracted fields must exactly match those that were added to the Web page.}

\subsection{Specification-Based Tests}
% cite specif
% "specified functionality that was not directly implied by the basic requirements"
% THE TESTS

\subsection{Design-Based Tests}
% cite design (prototype)
% test detailed shit about impl
	% because a lot of work was put into impl of prototype, assume eventual impl is similar to prot

\test{RE Pattern Generation}
{An important aspect of the scraper prototype described in \cite{dsgn} is the production of a standard, Perl-like regular expression. This test verifies that a set of fields (represented as ordered pairs of character indices) and a chunk of source markup are correctly transformed into a regular expression that matches the chunk, extracting its fields as subpatterns.}
{Generate a random sequence of characters. Also generate a random set of nonoverlapping character ranges in this string.}
{Invoke \texttt{regexFromFields} on the string and fields. Match the resulting regular expression against the randomly generated string.}
{Two criteria must be satisfied in order for the test to pass.
\begin{itemize}
\item The regular expression matching must succeed and match the whole string.
\item For all $n$, the $n$th matched subpattern must exactly equal the substring of the randomly generated string indicated by field (character range) $n$.
\end{itemize}}

\test{Basic Field Identification}
{The most basic field in a Web page is characterized by a string of differences in two chunks of text. This test ensures that this kind of field is correctly identified.}
{Define a sequence of characters not containing \texttt{"}, \texttt{<}, or \texttt{>} (for instance, \texttt{abcdefghijk}); call this $c_1$. Define a string $c_2$ by changing a contiguous sequence of these characters (in our example, $c_2$ is \texttt{abcdeFGHijk}).}
{Pass $c_1$ and $c_2$ to \texttt{findFields}. Collect the resulting list of fields.}
{The test passes if and only if \texttt{findFields} returns exactly one character range that encompasses exactly the set of modified characters. In the case described above, the correct field would be \texttt{(6,8)}.}

\test{Multiple Basic Field Identification}
{Ensure that basic field identification extends to cases in which more than one field is present. (This test may seem redundant. It is, however, useful to identify cases in which a single field can be found but several fields cause the algorithm to fail.)}
{Define a simple string $c_1$ as above. Instead of permuting a contiguous set of characters to create $c_2$, change any of the characters. (For instance, if $c_1$ is \texttt{abcdefghijk}, then $c_2$ might be \texttt{aBcdEFghIJK}.}
{Pass $c_1$ and $c_2$ to \texttt{findFields}. Collect the resulting list of fields.}
{Iterate through the resulting character ranges. The test passes if and only if the fields contain only characters that differ between $c_1$ and $c_2$ and every differing character is contained within a field.}

\test{Insertion Field Identification}
{Ensure that a field is identified when one two strings are equal except that one contains additional characters not present in the other.}
{Define $c_1$ as above. Create $c_2$ by copying $c_1$ and inserting a sequence of characters at several positions in the string. (For example, $c_1$ might be \texttt{acgjk} and $c_2$ might be \texttt{abcdefghijk}.)}
{Pass $c_1$ and $c_2$ to \texttt{findFields}. Collect the resulting list of fields.}
{The test passes if and only if \texttt{findFields} returns character ranges of zero length at every index where characters were inserted. (In our example, the correct output would be \texttt{(2,2),(3,3),(4,4)}).}

\test{Tag-Delimited Field Identification}
{Because many data fields in HTML markup are located between tags, the scraper expands fields it finds to fit entirely within \texttt{>} and \texttt{<} symbols. This test ensures that fields are identified according to this behavior.}
{Define $c_1$ as a segment of HTML markup (for instance, \texttt{abc<b>defgh</b>ijk}). Define $c_2$ as a copy of $c_1$ with one character not located inside a tag modified (in this example, \texttt{abc<b>deFgh</b>ijk}).}
{Pass $c_1$ and $c_2$ to \texttt{findFields}. Collect the resulting list of fields.}
{The test passes if and only if the only returned field is one spanning the longest character range including the modified character but not including a \texttt{>} or \texttt{<} character. (In our example, the correct field would be \texttt{(7,11)}.)}

\test{Quote-Delimited Field Identification}
{Many parameters inside HTML tags, which are delimited by \texttt{"} characters, represent assignment information fields. Ensure that the scraper correctly defines such tag parameters as fields.}
{Define $c_1$ as a segment of HTML markup containing a tag with a parameter (for instance, \texttt{abcd<a href="efgh">ij</a>k}). Define $c_2$ as a permutation of $c_1$ in which one character in an HTML tag parameter is changed (in this case, $c_2$ might be \texttt{abcd<a href="eFgh">ij</a>k}).}
{Pass $c_1$ and $c_2$ to \texttt{findFields}. Collect the resulting list of fields.}
{The test passes if and only if the only returned field is one spanning the modified parameter. (In our example, this would be \texttt{(14,17)}.}

\begin{thebibliography}{99}
%\bibitem{reqs} La Motte-Mitchell, Andrew, Adrian Sampson, and Stephon Striplin,
%   {\it Cue Requirements}.
%   CS121 Project 2,
%   February 27, 2007.
   
\bibitem{arch} La Motte-Mitchell, Andrew, Adrian Sampson, and Stephon Striplin,
   {\it Cue Architecture}.
   CS121 Project 3,
   April 7, 2007.

\bibitem{dsgn} Sampson, Adrian,
   {\it Cue: Assignment Page Scanning}.
   CS121 Project 3,
   April 7, 2007.
   
\bibitem{spec} Striplin, Stephon,
   {\it Cue: Component Specifications: Importing}.
   CS121 Project 3,
   April 7, 2007.

% cite testing framework resources
\end{thebibliography}

\end{document}ENDREP
id: 20.q.r125/11150
type: file
pred: 20.q.r124/762
count: 8
text: 125 0 11127 11311 513847281fdfb32d62f59180e9a043d7
props: 116 351 29 0 ff5c3c1f7bdb48ba0201950780ae7e31
cpath: /project4/sampson/test_plan.tex
copyroot: 122 /project4/sampson/test_plan.tex

PLAIN
K 15
post-mortem.tex
V 18
file 21.0.r118/510
K 13
test_plan.tex
V 20
file 20.q.r125/11150
END
ENDREP
id: 1z.0.r125/11513
type: dir
pred: 1z.0.r124/1120
count: 9
text: 125 11406 94 94 ea132e894962a9ba56e0fffb7918dc19
cpath: /project4/sampson
copyroot: 0 /

PLAIN
K 15
lamottemitchell
V 18
dir 1w.0.r115/6624
K 7
sampson
V 19
dir 1z.0.r125/11513
END
ENDREP
id: 1v.0.r125/11767
type: dir
pred: 1v.0.r124/1371
count: 11
text: 125 11668 86 86 198e6421269af19b108a156934316107
cpath: /project4
copyroot: 0 /

PLAIN
K 8
project2
V 16
dir x.0.r41/3433
K 8
project3
V 17
dir y.0.r113/5763
K 8
project4
V 19
dir 1v.0.r125/11767
END
ENDREP
id: 0.0.r125/12041
type: dir
pred: 0.0.r124/1642
count: 125
text: 125 11915 113 113 bc7c6ecf6e13cce8986ea3da26f0487c
cpath: /
copyroot: 0 /

20.q.t124-1 modify true false /project4/sampson/test_plan.tex


12041 12182
