index.html 19.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta http-equiv="last-modified" content="2017-08-04 00:20:27 +0200">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <!-- meta "search-domain" used for google site search function google_search() -->
    <meta name="search-domain" value="/swc-releases/2017.08/python-novice-inflammation">
    <link rel="stylesheet" type="text/css" href="../assets/css/bootstrap.css" />
    <link rel="stylesheet" type="text/css" href="../assets/css/bootstrap-theme.css" />
    <link rel="stylesheet" type="text/css" href="../assets/css/lesson.css" />
    
    <link rel="shortcut icon" type="image/x-icon" href="/favicon-swc.ico" />
    
    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
    <!--[if lt IE 9]>
	<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
	<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
	<![endif]-->
    <title>Programming with Python: Debugging</title>
  </head>
  <body>
    <div class="container">
      
<nav class="navbar navbar-default">
  <div class="container-fluid">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false">
        <span class="sr-only">Toggle navigation</span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>

      
      

      
      <a class="navbar-brand" href="../">Home</a>

    </div>
    <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
      <ul class="nav navbar-nav">

	
        <li><a href="../conduct/">Code of Conduct</a></li>

	
        
        <li><a href="../setup/">Setup</a></li>
        <li class="dropdown">
          <a href="../" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Episodes <span class="caret"></span></a>
          <ul class="dropdown-menu">
            
            <li><a href="../01-numpy/">Analyzing Patient Data</a></li>
            
            <li><a href="../02-loop/">Repeating Actions with Loops</a></li>
            
            <li><a href="../03-lists/">Storing Multiple Values in Lists</a></li>
            
            <li><a href="../04-files/">Analyzing Data from Multiple Files</a></li>
            
            <li><a href="../05-cond/">Making Choices</a></li>
            
            <li><a href="../06-func/">Creating Functions</a></li>
            
            <li><a href="../07-errors/">Errors and Exceptions</a></li>
            
            <li><a href="../08-defensive/">Defensive Programming</a></li>
            
            <li><a href="../09-debugging/">Debugging</a></li>
            
            <li><a href="../10-cmdline/">Command-Line Programs</a></li>
            
	    <li role="separator" class="divider"></li>
            <li><a href="../aio/">All in one page (Beta)</a></li>
          </ul>
        </li>
	

	
	
        <li class="dropdown">
          <a href="../" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Extras <span class="caret"></span></a>
          <ul class="dropdown-menu">
            <li><a href="../reference/">Reference</a></li>
            
            <li><a href="../about/">About</a></li>
            
            <li><a href="../discuss/">Discussion</a></li>
            
            <li><a href="../figures/">Figures</a></li>
            
            <li><a href="../guide/">Instructor Notes</a></li>
            
          </ul>
        </li>
	

	
        <li><a href="../license/">License</a></li>
	
	<li><a href="/edit/gh-pages/_episodes/09-debugging.md">Improve this page <span class="glyphicon glyphicon-pencil" aria-hidden="true"></span></a></li>
	
      </ul>
      <form class="navbar-form navbar-right" role="search" id="search" onsubmit="google_search(); return false;">
        <div class="form-group">
          <input type="text" id="google-search" placeholder="Search..." aria-label="Google site search">
        </div>
      </form>
    </div>
  </div>
</nav>


<div class="row">
  <div class="col-md-1">
    <h3>
      
      <a href="../08-defensive/"><span class="glyphicon glyphicon-menu-left" aria-hidden="true"></span><span class="sr-only">previous episode</span></a>
      
    </h3>
  </div>
  <div class="col-md-10">
    
    <h3 class="maintitle"><a href="../">Programming with Python</a></h3>
    
  </div>
  <div class="col-md-1">
    <h3>
      
      <a href="../10-cmdline/"><span class="glyphicon glyphicon-menu-right" aria-hidden="true"></span><span class="sr-only">next episode</span></a>
      
    </h3>
  </div>
</div>

<article>
<div class="row">
  <div class="col-md-1">
  </div>
  <div class="col-md-10">
    <h1 class="maintitle">Debugging</h1>
  </div>
  <div class="col-md-1">
  </div>
</div>


<blockquote class="objectives">
  <h2>Overview</h2>

  <div class="row">
    <div class="col-md-3">
      <strong>Teaching:</strong> 30 min
      <br/>
      <strong>Exercises:</strong> 0 min
    </div>
    <div class="col-md-9">
      <strong>Questions</strong>
      <ul>
	
	<li><p>How can I debug my program?</p>
</li>
	
      </ul>
    </div>
  </div>

  <div class="row">
    <div class="col-md-3">
    </div>
    <div class="col-md-9">
      <strong>Objectives</strong>
      <ul>
	
	<li><p>Debug code containing an error systematically.</p>
</li>
	
	<li><p>Identify ways of making code less error-prone and more easily tested.</p>
</li>
	
      </ul>
    </div>
  </div>

</blockquote>

<p>Once testing has uncovered problems,
the next step is to fix them.
Many novices do this by making more-or-less random changes to their code
until it seems to produce the right answer,
but that’s very inefficient
(and the result is usually only correct for the one case they’re testing).
The more experienced a programmer is,
the more systematically they debug,
and most follow some variation on the rules explained below.</p>

<h2 id="know-what-its-supposed-to-do">Know What It’s Supposed to Do</h2>

<p>The first step in debugging something is to
<em>know what it’s supposed to do</em>.
“My program doesn’t work” isn’t good enough:
in order to diagnose and fix problems,
we need to be able to tell correct output from incorrect.
If we can write a test case for the failing case — i.e.,
if we can assert that with <em>these</em> inputs,
the function should produce <em>that</em> result —
then we’re ready to start debugging.
If we can’t,
then we need to figure out how we’re going to know when we’ve fixed things.</p>

<p>But writing test cases for scientific software is frequently harder than
writing test cases for commercial applications,
because if we knew what the output of the scientific code was supposed to be,
we wouldn’t be running the software:
we’d be writing up our results and moving on to the next program.
In practice,
scientists tend to do the following:</p>

<ol>
  <li>
    <p><em>Test with simplified data.</em>
Before doing statistics on a real data set,
we should try calculating statistics for a single record,
for two identical records,
for two records whose values are one step apart,
or for some other case where we can calculate the right answer by hand.</p>
  </li>
  <li>
    <p><em>Test a simplified case.</em>
If our program is supposed to simulate
magnetic eddies in rapidly-rotating blobs of supercooled helium,
our first test should be a blob of helium that isn’t rotating,
and isn’t being subjected to any external electromagnetic fields.
Similarly,
if we’re looking at the effects of climate change on speciation,
our first test should hold temperature, precipitation, and other factors constant.</p>
  </li>
  <li>
    <p><em>Compare to an oracle.</em>
A <a href="../reference/#test-oracle">test oracle</a> is something whose results are trusted,
such as experimental data, an older program, or a human expert.
We use test oracles to determine if our new program produces the correct results.
If we have a test oracle,
we should store its output for particular cases
so that we can compare it with our new results as often as we like
without re-running that program.</p>
  </li>
  <li>
    <p><em>Check conservation laws.</em>
Mass, energy, and other quantities are conserved in physical systems,
so they should be in programs as well.
Similarly,
if we are analyzing patient data,
the number of records should either stay the same or decrease
as we move from one analysis to the next
(since we might throw away outliers or records with missing values).
If “new” patients start appearing out of nowhere as we move through our pipeline,
it’s probably a sign that something is wrong.</p>
  </li>
  <li>
    <p><em>Visualize.</em>
Data analysts frequently use simple visualizations to check both
the science they’re doing
and the correctness of their code
(just as we did in the <a href="../01-numpy/">opening lesson</a> of this tutorial).
This should only be used for debugging as a last resort,
though,
since it’s very hard to compare two visualizations automatically.</p>
  </li>
</ol>

<h2 id="make-it-fail-every-time">Make It Fail Every Time</h2>

<p>We can only debug something when it fails,
so the second step is always to find a test case that
<em>makes it fail every time</em>.
The “every time” part is important because
few things are more frustrating than debugging an intermittent problem:
if we have to call a function a dozen times to get a single failure,
the odds are good that we’ll scroll past the failure when it actually occurs.</p>

<p>As part of this,
it’s always important to check that our code is “plugged in”,
i.e.,
that we’re actually exercising the problem that we think we are.
Every programmer has spent hours chasing a bug,
only to realize that they were actually calling their code on the wrong data set
or with the wrong configuration parameters,
or are using the wrong version of the software entirely.
Mistakes like these are particularly likely to happen when we’re tired,
frustrated,
and up against a deadline,
which is one of the reasons late-night (or overnight) coding sessions
are almost never worthwhile.</p>

<h2 id="make-it-fail-fast">Make It Fail Fast</h2>

<p>If it takes 20 minutes for the bug to surface,
we can only do three experiments an hour.
That doesn’t just mean we’ll get less data in more time:
we’re also more likely to be distracted by other things as we wait for our program to fail,
which means the time we <em>are</em> spending on the problem is less focused.
It’s therefore critical to <em>make it fail fast</em>.</p>

<p>As well as making the program fail fast in time,
we want to make it fail fast in space,
i.e.,
we want to localize the failure to the smallest possible region of code:</p>

<ol>
  <li>
    <p>The smaller the gap between cause and effect,
the easier the connection is to find.
Many programmers therefore use a divide and conquer strategy to find bugs,
i.e.,
if the output of a function is wrong,
they check whether things are OK in the middle,
then concentrate on either the first or second half,
and so on.</p>
  </li>
  <li>
    <p>N things can interact in N<sup>2</sup> different ways,
so every line of code that <em>isn’t</em> run as part of a test
means more than one thing we don’t need to worry about.</p>
  </li>
</ol>

<h2 id="change-one-thing-at-a-time-for-a-reason">Change One Thing at a Time, For a Reason</h2>

<p>Replacing random chunks of code is unlikely to do much good.
(After all,
if you got it wrong the first time,
you’ll probably get it wrong the second and third as well.)
Good programmers therefore
<em>change one thing at a time, for a reason</em>.
They are either trying to gather more information
(“is the bug still there if we change the order of the loops?”)
or test a fix
(“can we make the bug go away by sorting our data before processing it?”).</p>

<p>Every time we make a change,
however small,
we should re-run our tests immediately,
because the more things we change at once,
the harder it is to know what’s responsible for what
(those N<sup>2</sup> interactions again).
And we should re-run <em>all</em> of our tests:
more than half of fixes made to code introduce (or re-introduce) bugs,
so re-running all of our tests tells us whether we have regressed.</p>

<h2 id="keep-track-of-what-youve-done">Keep Track of What You’ve Done</h2>

<p>Good scientists keep track of what they’ve done
so that they can reproduce their work,
and so that they don’t waste time repeating the same experiments
or running ones whose results won’t be interesting.
Similarly,
debugging works best when we
<em>keep track of what we’ve done</em>
and how well it worked.
If we find ourselves asking,
“Did left followed by right with an odd number of lines cause the crash?
Or was it right followed by left?
Or was I using an even number of lines?”
then it’s time to step away from the computer,
take a deep breath,
and start working more systematically.</p>

<p>Records are particularly useful when the time comes to ask for help.
People are more likely to listen to us
when we can explain clearly what we did,
and we’re better able to give them the information they need to be useful.</p>

<blockquote class="callout">
  <h2 id="version-control-revisited">Version Control Revisited</h2>

  <p>Version control is often used to reset software to a known state during debugging,
and to explore recent changes to code that might be responsible for bugs.
In particular,
most version control systems have a <code class="highlighter-rouge">blame</code> command
that will show who last changed particular lines of code…</p>
</blockquote>

<h2 id="be-humble">Be Humble</h2>

<p>And speaking of help:
if we can’t find a bug in 10 minutes,
we should <em>be humble</em> and ask for help.
Just explaining the problem aloud is often useful,
since hearing what we’re thinking helps us spot inconsistencies and hidden assumptions.</p>

<p>Asking for help also helps alleviate confirmation bias.
If we have just spent an hour writing a complicated program,
we want it to work,
so we’re likely to keep telling ourselves why it should,
rather than searching for the reason it doesn’t.
People who aren’t emotionally invested in the code can be more objective,
which is why they’re often able to spot the simple mistakes we have overlooked.</p>

<p>Part of being humble is learning from our mistakes.
Programmers tend to get the same things wrong over and over:
either they don’t understand the language and libraries they’re working with,
or their model of how things work is wrong.
In either case,
taking note of why the error occurred
and checking for it next time
quickly turns into not making the mistake at all.</p>

<p>And that is what makes us most productive in the long run.
As the saying goes,
<em>A week of hard work can sometimes save you an hour of thought</em>.
If we train ourselves to avoid making some kinds of mistakes,
to break our code into modular, testable chunks,
and to turn every assumption (or mistake) into an assertion,
it will actually take us <em>less</em> time to produce working programs,
not more.</p>

<blockquote class="challenge">
  <h2 id="debug-with-a-neighbor">Debug With a Neighbor</h2>

  <p>Take a function that you have written today, and introduce a tricky bug.
Your function should still run, but will give the wrong output.
Switch seats with your neighbor and attempt to debug
the bug that they introduced into their function.
Which of the principles discussed above did you find helpful?</p>
</blockquote>

<blockquote class="challenge">
  <h2 id="not-supposed-to-be-the-same">Not Supposed to be the Same</h2>

  <p>You are assisting a researcher with Python code that computes the
Body Mass Index (BMI) of patients.  The researcher is concerned because
all patients seemingly have unusual and identical BMIs, despite having different
physiques.  BMI is calculated as <strong>weight in kilograms</strong>
divided by the the square of <strong>height in metres</strong>.</p>

  <p>Use the debugging principles in this exercise and locate problems
with the code. What suggestions would you give the researcher for
ensuring any later changes they make work correctly?</p>

  <div class="python highlighter-rouge"><pre class="highlight"><code>patients = [[70, 1.8], [80, 1.9], [150, 1.7]]

def calculate_bmi(weight, height):
    return weight / (height ** 2)

for patient in patients:
    weight, height = patients[0]
    bmi = calculate_bmi(height, weight)
    print("Patient's BMI is: %f" % bmi)
</code></pre>
  </div>

  <div class="output highlighter-rouge"><pre class="highlight"><code>Patient's BMI is: 0.000367
Patient's BMI is: 0.000367
Patient's BMI is: 0.000367
</code></pre>
  </div>

  <blockquote class="solution">
    <h2 id="solution">Solution</h2>
    <ul>
      <li>
        <p>The loop is not being utilised correctly. <code class="highlighter-rouge">height</code> and <code class="highlighter-rouge">weight</code> are always
set as the first patient’s data during each iteration of the loop.</p>
      </li>
      <li>
        <p>The height/weight variables are reversed in the function call to
<code class="highlighter-rouge">calculate_bmi(...)</code>, the correct BMIs are 21.604938, 22.160665 and 51.903114.</p>
      </li>
    </ul>
  </blockquote>
</blockquote>


<blockquote class="keypoints">
  <h2>Key Points</h2>
  <ul>
    
    <li><p>Know what code is supposed to do <em>before</em> trying to debug it.</p>
</li>
    
    <li><p>Make it fail every time.</p>
</li>
    
    <li><p>Make it fail fast.</p>
</li>
    
    <li><p>Change one thing at a time, and for a reason.</p>
</li>
    
    <li><p>Keep track of what you’ve done.</p>
</li>
    
    <li><p>Be humble.</p>
</li>
    
  </ul>
</blockquote>

</article>

<div class="row">
  <div class="col-md-1">
    <h3>
      
      <a href="../08-defensive/"><span class="glyphicon glyphicon-menu-left" aria-hidden="true"></span><span class="sr-only">previous episode</span></a>
      
    </h3>
  </div>
  <div class="col-md-10">
    
  </div>
  <div class="col-md-1">
    <h3>
      
      <a href="../10-cmdline/"><span class="glyphicon glyphicon-menu-right" aria-hidden="true"></span><span class="sr-only">next episode</span></a>
      
    </h3>
  </div>
</div>


      
      
<footer>
  <div class="row">
    <div class="col-md-6" align="left">
      <h4>
	Copyright &copy; 2016–2017
	
	<a href="https://software-carpentry.org">Software Carpentry Foundation</a>
	
      </h4>
    </div>
    <div class="col-md-6" align="right">
      <h4>
	
	<a href="/edit/gh-pages/_episodes/09-debugging.md">Edit on GitHub</a>
	
	/
	<a href="/blob/gh-pages/CONTRIBUTING.md">Contributing</a>
	/
	<a href="/">Source</a>
	/
	<a href="/blob/gh-pages/CITATION">Cite</a>
	/
	<a href="">Contact</a>
      </h4>
    </div>
  </div>
</footer>

      
    </div>
    
<script src="../assets/js/jquery.min.js"></script>
<script src="../assets/js/bootstrap.min.js"></script>
<script src="../assets/js/lesson.js"></script>
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
  ga('create', 'UA-37305346-2', 'auto');
  ga('send', 'pageview');
</script>

  </body>
</html>