diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 7d44e3d..ae5a629 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -27,16 +27,17 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.8] + python-version: [3.12] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 # Install dependencies - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | pip install -r requirements.txt @@ -46,19 +47,21 @@ jobs: run: | jupyter-book build site - - name: Create deployment artifact - uses: actions/upload-pages-artifact@v1 + # Upload the book's HTML as an artifact + - name: Upload deployment artifact + uses: actions/upload-pages-artifact@v3 with: - path: sitio/_build/html + path: site/_build/html + # Deploy the book's HTML to GitHub Pages deploy: environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest needs: build-book - if: github.ref == 'refs/heads/main' && github.repository == 'emiliom/dinosip-python' + if: github.ref == 'refs/heads/main' && github.repository == 'UW-APL-SURP/aplsurp-python' steps: - - name: Deploy to Github Pages + - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@v4 diff --git a/README.md b/README.md index 3cf1512..4ed04b4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,45 @@ # README -Stub. +**(NONE OF THIS IS ON THE PUBLIC-REPO README. DELETE, ADAPT)** + +- ~~Rename the base folder to aplsurp-python~~ +- ~~Conda env (previously `dinosip-python-web`, now renamed to `aplsurp-python-web`)~~ +- Conda env: I currently just use `environment.yml`. But maybe `requirements.txt` will be needed later for CI-based builds on GH +- To build the site: `jb build site` (or `jb clean site` first) +- To preview it: open the file `site/_build/html/index.html` +- To publish: `ghp-import -n -p -f --remote origin site/_build/html` +- Integrating Ethan's notebooks: + - See my local clone of Ethan's repo: `/home/mayorga/Desktop/DEI/APL/2021_DON_APL-DID-NOW/Python-crashcourse/NCAT_MATE_floats_teaching/` + - Use [url encoding](https://meyerweb.com/eric/tools/dencoder/) to transform the names of the notebook files if they have spaces and other unsafe characters +- Say something about notebooks: Jupyter notebooks vs notebooks in Colab +- Weave the individual lessons (from Carpentry & Pythia) into the notebooks, or as additional notes here next to each notebook + + +## Stuff in Spanish about Jupyter Book that I'm going to cull and translate + +## From Hackaton OHW en español + +Con el [Jupyter Book](https://jupyterbook.org) (y muchos sistemas parecidos), uno modifica los archivos "fuente", típicamente en markdown, y luego "genera" los archivos del sitio en sí. Pueden leer sobre esto [aquí](https://jupyterbook.org/en/stable/basics/building/index.html). + +Los archivos fuente (el contenido) se almacenan en el branch principal, `main`, bajo la carpeta [`sitio`](sitio). Los archivos generados del sitio son colocados en el branch `gh-pages`. En la gran mayoría de los casos no es recomendable editar los archivos en `gh-pages` manualmente, directamente. Entonces, hay dos workflows recomendados: +- Se corre (`build`) jupyter book localmente, para confirmar que todo se vea bien. Una vez que esté listo, se empujan los archivos del build al repositorio usando el programa ghp-import, corrido con algo parecido a esto: `ghp-import -n -p -f --remote upstream sitio/_build/html`. Los archivos fuente en el repositorio son actualizados usando idealmente un pull request. Esto es lo que he estado haciendo hasta ahora. +- Activando un sistema en GitHub que genera (build) el sitio automáticamente cada vez que se envía un pull request. Este sistema también incluye un preview de los cambios propuestos, que es muy útil. Este automatización hace futuras modificaciones mucho más sencillas, especialmente cuando se trata de modificaciones de páginas existentes. + +Este mecanismo automático ya está activo. Ahora, al crear un pull request (PR), después de un minuto (mas o menos) verán un enlace como este bajo "Deploy Preview for sage-puppy-e64764 ready!": + + +Este enlace los llevará a una versión temporal del sitio donde pueden confirmar los cambios propuestos en el PR. Una vez que le hagan "merge" al PR, el sitio será reconstruido automáticamente en `gh-pages` con los cambios en el PR. Esto toma un par de minutos. + +Pueden ver un ejemplo de esto en un PR reciente, como este: #3 + +Es sencillo editar una página directamente desde el sitio web: en los íconos arriba a la derecha de cada página, presionen el ícono de gihub y luego seleccionen "suggest edit". Esto los lleva a github, con el interfaz de modificación del markdown. + + +Cuando estén listos a enviar los cambios, por favor seleccionen "Create a new branch for this commit and start a pull request" antes de presionar "Commit changes". Esto nos permitir a otros revisar los cambios antes de aprobarlos. + + + + +Hay dos archivos importantes de configuración del sitio: +- [`_config.yml`](sitio/_config.yml). [Configuraciones básicas del sitio](https://jupyterbook.org/en/stable/customize/config.html). Raremente tendremos que modificar esto. +- [`_toc.yml`](sitio/_toc.yml). "Table of Content". [Organización de las páginas en el sitio.](https://jupyterbook.org/en/stable/basics/organize.html) diff --git a/environment.yml b/environment.yml index 364d218..eeb6c51 100644 --- a/environment.yml +++ b/environment.yml @@ -1,13 +1,14 @@ -name: dinosip-python-web +name: aplsurp-python-web channels: - conda-forge - nodefaults dependencies: - - python=3.9 + - python=3.12 - pip - jupyter-book - ipykernel - ghp-import + - folium - matplotlib - numpy - pandas @@ -15,6 +16,5 @@ dependencies: # - geopandas # - xarray # - seaborn - # - folium # - cartopy diff --git a/requirements.txt b/requirements.txt index 93253d9..986fec2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ jupyter-book -attrs<22 -nbclient<0.6 +attrs +nbclient matplotlib numpy diff --git a/site/_config.yml b/site/_config.yml index 0d3f207..39d9f1d 100644 --- a/site/_config.yml +++ b/site/_config.yml @@ -1,13 +1,14 @@ # Book settings # Learn more at https://jupyterbook.org/customize/config.html -title: DINOSIP Python workshop -author: Emilio Mayorga -logo: img/dinosip-logo.png +title: APL SURP Python Course +author: Emilio Mayorga, Ethan Campbell +copyright: "2025" +logo: img/surp_logo_115x168.png # HMM, where do these show up? -announcement: Python tour workshop for DINO SIP +announcement: Python course for APL SURP program description: >- - Python tour workshop for DINO SIP + Python course for APL SURP program # Force re-execution of notebooks on each build. # See https://jupyterbook.org/content/execute.html @@ -25,21 +26,24 @@ parse: # Information about where the book exists on the web repository: - url: https://github.com/emiliom/dinosip-python # Online location of your book + url: https://github.com/UW-APL-SURP/aplsurp-python # Online location of your book path_to_book: site # Optional path to your book, relative to the repository root branch: main # Which branch of the repository should be used when creating links (optional) # Add GitHub buttons to your book # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository html: - #baseurl: https://emiliom.github.io/nica-ecocomputacion/ - #favicon: imagenes/favicon1.ico - home_page_in_navbar: false + #baseurl: https://UW-APL-SURP.github.io/aplsurp-python/ + #favicon: img/favicon1.ico + home_page_in_navbar: true #extra_navbar: "" use_issues_button: true use_repository_button: true use_edit_page_button: true +launch_buttons: + colab_url: "https://colab.research.google.com" + sphinx: config: language: en diff --git a/site/_toc.yml b/site/_toc.yml index fd8a6c7..db50eb2 100644 --- a/site/_toc.yml +++ b/site/_toc.yml @@ -2,15 +2,28 @@ # Learn more at https://jupyterbook.org/customize/toc.html format: jb-book -root: index - -chapters: -- file: install-run - title: Installation, execution -- file: overview - title: Quick overview -- file: lessons - title: Detailed lessons -- file: advancedexamples - title: Advanced examples -- file: cheatsheets +root: index +parts: +- caption: + chapters: + - file: python + - file: overview +- caption: Main lessons + chapters: + - file: lessons/index + title: Overview + - file: "lessons/APL_SURP_Python_course_Notebook_1.ipynb" + title: Day 1 + - file: "lessons/APL_SURP_Python_course_Notebook_2.ipynb" + title: Day 2 + - file: "lessons/APL_SURP_Python_course_Notebook_3_blank_copy.ipynb" + title: Day 3 +- caption: Other resources + chapters: + - file: install-run + title: Running Python + - file: complementary_lessons + title: Complementary lessons + - file: cheatsheets + - file: advancedexamples + title: Advanced examples diff --git a/site/advancedexamples.md b/site/advancedexamples.md index 1425834..7db21ff 100644 --- a/site/advancedexamples.md +++ b/site/advancedexamples.md @@ -4,7 +4,7 @@ [SeaBird](https://www.seabird.com/) CTD instruments are widely used in Oceanography. Reading SeaBird CTD `.cnv` data files is therefore a common task. The files are text based and one could write code from scratch to read and parse them, but then lots of people would be reinventing the wheel, imperfectly. -We will use the existing `ctd` open-source Python package, https://pyoceans.github.io/python-ctd/. We'll go through a notebook that demonstrates its use. I copied the notebook to [notebooks/ctd-quick_intro.ipynb](./notebooks/ctd-quick_intro.ipynb). It can be run by installing the `ctd` package in your conda environment. If at the terminal, you'd do this, where `my_conda_env` is the name of your conda environment: +We will use the existing `ctd` open-source Python package, https://pyoceans.github.io/python-ctd/. We'll go through a [notebook found on that site](https://pyoceans.github.io/python-ctd/quick_intro-output.html#Reading-and-plotting) that demonstrates its use; I copied the notebook to [notebooks/ctd-quick_intro.ipynb](./notebooks/ctd-quick_intro.ipynb). It can be run by installing the `ctd` package in your conda environment. If at the terminal, you'd do this, where `my_conda_env` is the name of your conda environment: ```bash conda activate my_conda_env conda install -c conda-forge ctd @@ -18,10 +18,3 @@ Here's a depth profile image created by the `ctd` example notebook: ```{note} It's not uncommon for different people or groups to create open-source packages with overlapping capabilities. There is at least one other package that reads SeaBird `cnv` files: the `seabird` package, https://seabird.readthedocs.io. Often in these cases, each package may have some features that the other one doesn't. ``` - -## microSWIFT data file - -Prospects for reading [microSWIFT](https://apl.uw.edu/project/project.php?id=swift) data files in Python: - -- https://github.com/jacobrdavis -- https://github.com/edwinrainville/microSWIFT-io diff --git a/site/cheatsheets.md b/site/cheatsheets.md index 6a485f3..cdcb3ba 100644 --- a/site/cheatsheets.md +++ b/site/cheatsheets.md @@ -3,7 +3,6 @@ It's hard to remember the details of syntax and function calls without using it actively and regularly. Compact "cheat sheets" can be a convenient resource for looking up specific details and capabilities. Here are a set of Python cheat sheets that should be useful. - General: https://www.pythoncheatsheet.org/ -- General and Matplotlib: https://ehmatthes.github.io/pcc_2e/cheat_sheets/cheat_sheets/ - Numpy: https://www.datacamp.com/cheat-sheet/numpy-cheat-sheet-data-analysis-in-python -- Matplotlib and Pandas: https://python-graph-gallery.com/cheat-sheets/ -- Pandas: https://www.datacamp.com/cheat-sheet/pandas-cheat-sheet-for-data-science-in-python +- Matplotlib: https://matplotlib.org/cheatsheets/ +- Pandas: https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf diff --git a/site/lessons.md b/site/complementary_lessons.md similarity index 52% rename from site/lessons.md rename to site/complementary_lessons.md index fbc38aa..c5b333f 100644 --- a/site/lessons.md +++ b/site/complementary_lessons.md @@ -1,10 +1,10 @@ -# Detailed lessons +# Complementary, in-depth lessons from other groups -There are **many** free Python tutorials online; a quick search will produce a very long list. For example, [Python Basics: Introduction to Python](https://realpython.com/learning-paths/python-basics/) features many detailed lessons from [https://realpython.com](https://realpython.com), which I find to be a great source for Python in-depth learning. [Project Pythia Foundations](https://foundations.projectpythia.org/landing-page.html) has a great collection of more advanced tutorials especially geared for the earth sciences, including Oceanography. +There are **many** free Python tutorials online; a quick search will produce a very long list. For example, [Python Basics: Introduction to Python](https://realpython.com/learning-paths/python-basics/) features many detailed lessons from [https://realpython.com](https://realpython.com), which are a great source for Python in-depth learning. [Project Pythia Foundations](https://foundations.projectpythia.org/landing-page.html) has a great collection of more advanced tutorials especially geared for the earth sciences, including Oceanography. -Instead of creating learning materials from scratch, we'll take advantage of [The Carpentries, https://carpentries.org](https://carpentries.org), "a global community teaching foundational computational and data science skills to researchers in academia, industry and government." Carpentries tutorials and lessons are openly accessible. +The [lessons we created for the APL SURP course](lessons/index) cover each topic relatively briefly. Rather than creating new, in-depth learning materials from scratch, we'll take advantage of [The Carpentries](https://carpentries.org), "a global community teaching foundational computational and data science skills to researchers in academia, industry and government." Carpentries tutorials and lessons are openly accessible. -Specifically, **we'll use the [Plotting and Programming in Python](https://swcarpentry.github.io/python-novice-gapminder/) lesson from The Carpentries**: "This lesson is an introduction to programming in Python 3 for people with little or no previous programming experience." It's also referred to as the "gapminder" lesson, based on the dataset it uses. +Specifically, **we'll focus on the [Plotting and Programming in Python](https://swcarpentry.github.io/python-novice-gapminder/) lesson from The Carpentries**: "This lesson is an introduction to programming in Python 3 for people with little or no previous programming experience." It's also referred to as the "gapminder" lesson, based on the dataset it uses. ```{note} Another, similar Carpentries lesson that you may find helpful is [Programming with Python](https://swcarpentry.github.io/python-novice-inflammation/): "The best way to learn how to program is to do something useful, so this introduction to Python is built around a common scientific task: data analysis." @@ -12,11 +12,11 @@ Another, similar Carpentries lesson that you may find helpful is [Programming wi ## Getting the Data -The data we will use is taken from the [gapminder](https://en.wikipedia.org/wiki/Gapminder_Foundation) dataset. To obtain it, download and unzip the file [python-novice-gapminder-data.zip](https://swcarpentry.github.io/python-novice-gapminder/files/python-novice-gapminder-data.zip). In order to follow the presented material, you should launch JupyterLab in the folder that contains the unzipped ddadta file. (see [Starting JupyterLab](https://swcarpentry.github.io/python-novice-gapminder/01-run-quit.html#starting-jupyterlab)). +The data used in this subset of Software Carpentry lessons is taken from the [gapminder](https://en.wikipedia.org/wiki/Gapminder_Foundation) dataset. To obtain it, download and unzip the file [python-novice-gapminder-data.zip](https://swcarpentry.github.io/python-novice-gapminder/files/python-novice-gapminder-data.zip). To execute the lesson materials, launch JupyterLab in the folder that contains the unzipped data file. (see [Starting JupyterLab](https://swcarpentry.github.io/python-novice-gapminder/01-run-quit.html#starting-jupyterlab)). ## Lesson episodes -The Plotting and Programming in Python lesson is intended as a workshop that takes a full day. We will go through a *subset* of the materials (referred to as "episodes") and will only highlight some specific points within each episode. The goal is to examine in more detail most of the Python and programming elements we saw in the previous "Quick overview". +The *Plotting and Programming in Python* lesson is intended as a workshop that takes a full day. The *subset* of materials (referred to as "episodes") presented here expands on the lessons presented directly in the APL SURP course. ### The basics @@ -29,7 +29,6 @@ The Plotting and Programming in Python lesson is intended as a workshop that tak - [12. For Loops](https://swcarpentry.github.io/python-novice-gapminder/12-for-loops.html) - [13. Conditionals](https://swcarpentry.github.io/python-novice-gapminder/13-conditionals.html) -### -- Break and Q/A ### More advanced topics - [Numpy -- from Project Pythia](https://foundations.projectpythia.org/core/numpy/numpy-basics.html) @@ -40,5 +39,3 @@ The Plotting and Programming in Python lesson is intended as a workshop that tak - [16. Writing Functions](https://swcarpentry.github.io/python-novice-gapminder/16-writing-functions.html) The key points from each episode are [summarized here](https://swcarpentry.github.io/python-novice-gapminder/instructor/key-points.html) - -### -- Break and Q/A diff --git a/site/img/Pythia-launch-kernel.png b/site/img/Pythia-launch-kernel.png new file mode 100644 index 0000000..cb359f4 Binary files /dev/null and b/site/img/Pythia-launch-kernel.png differ diff --git a/site/img/surp_logo_115x168.png b/site/img/surp_logo_115x168.png new file mode 100644 index 0000000..afdbd2d Binary files /dev/null and b/site/img/surp_logo_115x168.png differ diff --git a/site/index.md b/site/index.md index bb0c7a4..df0f660 100644 --- a/site/index.md +++ b/site/index.md @@ -1,32 +1,18 @@ -# DINO SIP Python workshop materials +# APL SURP Python Course -A compressed introduction to Python for participants in the [DINO SIP program](https://www.apl.uw.edu/education/dino_sip.php) at the University of Washington Applied Physics Laboratory. +An introduction to [Python in Oceanography](python) for participants in the [APL SURP (Summer Undergraduate Research) Program](https://www.apl.uw.edu/education/dino_sip.php) at the University of Washington [Applied Physics Laboratory](https://apl.uw.edu). This course will provide a foundation to the Python programming language and "ecosystem" specifically, and programming and open-source software more generally. ## Goals -1. Provide *exposure* to the Python programming language and "ecosystem" specifically, and programming and open-source software more generally. +1. Introduce Python and its use in Oceanography through hands-on notebooks, to serve as a foundation for using Python in SURP research projects. 2. Provide a ramp for future, self-guided learning to both strengthen core topics that were covered and explore new topics. -With open-source software, we build on the work of others, contribute back to it, and build something new on top in collaboration with others. Rather than creating learning materials from scratch, here we'll rely on existing, open community resources. We can correct, polish and extend these materials with your input! +## Course descriptions -This workshop is intended as a 3-hour workshop, but the source materials provide a longer, more comprehensive experience. +The structured part of the course consists of [three 90-minute weekly lessons](lessons/index) led by APL instructors ([Ethan Campbell](https://github.com/ethan-campbell) and [Emilio Mayorga](https://github.com/emiliom)). The lessons will be in the form of interactive computational notebooks (["Jupyter"](https://docs.jupyter.org/en/latest/what_is_jupyter.html) notebooks) running in [Google Colab](https://colab.google) for convenience. -## Python +After the third week, we will hold weekly "office hours" for anyone to drop in and ask anything about Python or related topics. -[Python](https://www.python.org/) is a free, open-source, general-purpose and high-level programming language. It's relatively easy to learn, and the syntax facilitates writing new code fairly quickly. In the last few years it has become one of the most widely used general programming languages (see [here](https://pypl.github.io/PYPL.html) and [here](https://www.tiobe.com/tiobe-index/)) and among the [most popular languages for data science applications](https://r4stats.com/articles/popularity/). It's also widely used in the scientific community in general, and in Oceanography specifically (see [here](https://foundations.projectpythia.org/foundations/why-python.html) and [here](https://jose.theoj.org/papers/10.21105/jose.00037)). +This site also provides other materials for continued and deeper learning; see the *Other resources* section on the left. The content of this site, including the Jupyter notebooks used in the lessons, is open-source and available on GitHub at https://github.com/UW-APL-SURP/aplsurp-python. -It runs on many operating systems (Windows, MacOS, etc), and on computers from the smallest to the largest. It's often used in introductory programming classes, especially outside Computer Science majors. - -An an **open-source** language, it is freely available to anyone for any purpose, and both the core language (Python proper) and a very large "ecosystem" of packages (libraries) engage a wide range of contributors. But this openness sometimes leads to a disorienting set of choices, including the Python version, the sources and software used to install Python and write code in Python, and specialized packages used to accomplish specific tasks. Compared to, say, Matlab, the entry points can be more confusing. But there are tons of helpful resources and communities online! - -## Installation and execution - -[A brief overview](./install-run.md) of options for installing Python and developing and executing Python code. It points to specific instructions for installing the Anaconda distribution and the JupyterLab user interface, which we will use. - -## Lessons - -- [Quick, live overview](./overview.md). We'll run quickly through many of the core features of the language, using a Jupyter notebook in a live session online. -- [Detailed lessons](./lessons.md) -- [Advanced examples](./advancedexamples.md). We'll use a Python package to open and explore CTD data, including the data you collected last week! - -Plus [Python cheat sheets.](./cheatsheets.md) +Python is an open-source programming language. With open-source software, we are able to build on the work of others, contribute back to it, and build something new on top in collaboration with others. diff --git a/site/install-run.md b/site/install-run.md new file mode 100644 index 0000000..7f76388 --- /dev/null +++ b/site/install-run.md @@ -0,0 +1,45 @@ +# Running Python + +We use [Google Colab](https://colab.google) to run the instructor-led Python lessons for the first three weeks. Google Colab makes it really convenient for others to run [Jupyter](https://docs.jupyter.org/en/latest/what_is_jupyter.html) notebooks. There's no need to install or download anything in your computer. For many, Colab may also be all you need to use Python effectively for your SURP research project needs. However, some may find it necessary or convenient to set up a Python environment in your own computer or a project team server. The instructions below will help you get set up, and also provide a broader background. + + +## Local installation and execution + +### Anaconda distribution + +We recommend the use of the [Anaconda](https://www.anaconda.com/download) Python distribution to simplify the Python installation and execution of Python code files and Jupyter notebooks (the Python lessons notebooks that run on Google Colab are also Jupyter notebooks). For notebooks, we recommend [Jupyter tools and computational notebooks](https://docs.jupyter.org) (see [here, too](https://foundations.projectpythia.org/foundations/getting-started-jupyter.html)), interacting with Python code via the [JupyterLab application](https://swcarpentry.github.io/python-novice-gapminder/01-run-quit.html#starting-jupyterlab) (see also [here](https://foundations.projectpythia.org/foundations/jupyterlab.html) for a more detailed guide). + +```{admonition} Installing Anaconda on your computer +[Follow these instructions](https://carpentries.github.io/workshop-template/#python). The videos are somewhat dated, so the screenshots may look a bit different from what you will actually see during installation. But the steps are the same. [Here is a more up-to-date, short installation video](https://www.youtube.com/watch?v=5_hB2TNPgRc), for Windows only. +``` + +### Running JupyterLab + +Use the [Anaconda Navigator](https://www.anaconda.com/docs/tools/anaconda-navigator/main) to launch JupyterLab. It's included in the Anaconda installation, together with JupyterLab itself and many other applications and packages. Working with notebooks in JupyterLab will be familiar after you've used Google Colab. + +## The wider world of Python installation and development + +There are **many** options to install Python and software to develop and execute Python code! The second paragraph in [this page](https://swcarpentry.github.io/python-novice-gapminder/01-run-quit.html) is a very brief overview. + +Yes, this can be pretty confusing! + +Python itself can be installed directly from the official Python website, https://python.org. The installer includes Python, the ability to execute code at the "terminal", and a very simple "Integrated Development Environment" (IDE) called IDLE. + +An initial choice one has to make is the Python **version**. Typically it's best to avoid the very latest versions, and choose a slightly but more stable and more widely supported version. Today (July 2025), that's probably Python 3.12. + +Anaconda is based on the open-source [conda](https://conda.org/) package management system. `conda` itself can be installed through different distributions, with different pros and cons. Other important distrubutions are [miniconda](https://www.anaconda.com/docs/getting-started/miniconda/main), [miniforge](https://conda-forge.org/download/) and [mamba](https://mamba.readthedocs.io). The *source* of the `conda` packages also varies; the most widely used one in the scientific and other communities is [conda-forge](https://conda-forge.org/). + +As everything released by the Jupyter project is open-source, the ability to run Jupyter notebooks is also implemented in other software not developed directly by the Jupyter project. For example, [Visual Studio Code (VSCode)](https://code.visualstudio.com/), [Spyder](https://www.spyder-ide.org/) and [PyCharm](https://www.jetbrains.com/pycharm/) (PyCharm is not open-source but is freely available for education). + +For complex software development tasks, IDE's are practically necessary. These include VSCode, Spyder and PyCharm. Typically you will write Python code directly (`*.py` files) rather than Jupyter notebooks; or some combination. Out of these IDE's, Spyder probably resembles the Matlab interface the most. VSCode has the advantage of also being a powerful, generic text editor. IDE's provide lots of conveniences that make coding more efficient. + +You can also write code (not Jupyter notebooks) directly, "by hand", using a simple text editor; then run the code at the terminal. This is helpful in a pinch but pretty inefficient. + +## Simple, no-fuss options for continued learning + +If you'd like to polish, practice and extend your understaing of core Python language features, you can use Google Colab or one of these two no-fuss options: + +- [Thonny](https://thonny.org), a straightforward desktop software that also installs Python for its own use. Simple installation, few distractions, and helpful tools. +- Online Python execution sites. There are many options, but this one looks good: https://www.online-python.com + +These two are great resources for initial learning, but not for Python code development for actual, more complex work. diff --git a/site/lessons/APL_SURP_Python_course_Notebook_1.ipynb b/site/lessons/APL_SURP_Python_course_Notebook_1.ipynb new file mode 100644 index 0000000..944027a --- /dev/null +++ b/site/lessons/APL_SURP_Python_course_Notebook_1.ipynb @@ -0,0 +1,1824 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "collapsed_sections": [ + "UszPR4dNY8sW", + "J2FPr4LV86OO", + "aUDER1Q4Htyy", + "N6kV_Lat6DBG" + ] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# **APL SURP Python course** - Notebook 1 (completed version)\n", + "### ***Introduction to Python, variables, lists, 1-D arrays, and debugging***\n", + "\n", + "*Created for the University of Washington Applied Physics Laboratory's Summer Undergraduate Research Program (SURP) 2025.*" + ], + "metadata": { + "id": "OxvLAQ1SWpeR" + } + }, + { + "cell_type": "markdown", + "source": [ + "For additional resources on Python basics, you can consult the following resources on the APL-SURP Python course website:\n", + "* Tutorials on Python fundamentals: https://uw-apl-surp.github.io/aplsurp-python/overview.html\n", + "* Complementary lessons on specific Python topics: https://uw-apl-surp.github.io/aplsurp-python/complementary_lessons.html" + ], + "metadata": { + "id": "17Wn8Uio3ea6" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Part 1: Python and notebooks" + ], + "metadata": { + "id": "UszPR4dNY8sW" + } + }, + { + "cell_type": "markdown", + "source": [ + "**Computer code** allows us to work with data, create visualizations, and create repeatable scientific workflows. It is an integral part of the modern scientific method!\n", + "\n", + "Every programming language has a specific **syntax**. In English as well as programming languages, syntax describes valid combinations of symbols and words:\n", + "* Syntactically invalid: \"boy dog cat\"\n", + "* Syntactically valid: \"boy hugs cat\"\n", + "* Syntactically valid (but **semantically** invalid): \"cat hugs boy\"\n", + "\n", + "**Semantics** refer to whether a phrase has meaning. It's up to us to write computer code that has scientific meaning and is useful. The computer will allow us to write code that is syntactically valid but semantically – or scientifically – incorrect!\n", + "\n", + "\n", + "---" + ], + "metadata": { + "id": "stif3BkqXGuD" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "Djp9pEel9qA6" + } + }, + { + "cell_type": "markdown", + "source": [ + "(*Image source: [tiobe.com](https://www.tiobe.com/tiobe-index/)*)\n", + "\n", + "As of 2025, Python is likely the most widely-used programming language in the world, and its popularity continues to rise above other languages." + ], + "metadata": { + "id": "E752-6589-dV" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "lXb-zwLyIJ_i" + } + }, + { + "cell_type": "markdown", + "source": [ + "No programming language is perfect. As the inventor of C++ once said, *“There are only two kinds of programming languages: the ones people complain about and the ones nobody uses.”*\n", + "\n", + "However, there are many reasons that we use Python instead of other programming languages, like MATLAB, Java, or C:\n", + "- It's free!\n", + "- It reads a bit like written English, so it's easier to write and understand\n", + "- It's old, so it's very stable (Python was created in 1991)\n", + "- It can do almost anything\n", + "- It's incredibly popular inside and outside of science (so it could help you land a job)\n", + "- It's open source, which means anyone can help to improve it\n", + "\n", + "For more discussion of why Python is valuable and why its open source origin is important, see the APL-SURP Python course website: https://uw-apl-surp.github.io/aplsurp-python/python.html.\n", + "\n", + "***Question: How many of you have heard of Python before this course? Who has written code in Python before? Keeping your hands up, who has written code in any language before?***" + ], + "metadata": { + "id": "uKH9xnsPIJSB" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "LmkHnoGLJcAi" + } + }, + { + "cell_type": "markdown", + "source": [ + "This web page is called a **Jupyter notebook**. It lets us write and run computer code, and the results get displayed and saved alongside the code.\n", + "\n", + "In a Jupyter notebook, you can mix and match **code cells** and **text cells**. Text cells don't get interpreted as Python code. You can double-click on a text cell to edit it.\n", + "\n", + "***Try creating a new text cell using the menu button (+TEXT), write a message in it, then delete the cell.***\n", + "\n", + "Just like in a Google Doc, you can add comments to a cell using the button at the top right of the cell. ***Try this!***" + ], + "metadata": { + "id": "YkJewcBFh3eR" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "TzrsyTR8KlFl" + } + }, + { + "cell_type": "markdown", + "source": [ + "***Question: When we run Python code in this notebook, where is the code actually being run?***\n", + "\n", + "**Google Colab** (short for Colaboratory) is an online platform that lets you create and execute Jupyter notebooks from your web browser, like Chrome or Safari. Colab is free to use.\n", + "\n", + "When you create a new Colab notebook, it is stored in your Google Drive account. You can share Colab notebooks just like you would share a Google Doc using the \"Share\" menu at the top right of the page.\n", + "\n", + "You can download this notebook to your computer using the File menu -> Download. The file extension will be `.ipynb`. You can run Jupyter notebooks on your computer (what we would call **\"running locally\"**) if you have Python and key packages installed. If you wish to install and run Python locally, the APL-SURP Python course website offers information on doing so: https://uw-apl-surp.github.io/aplsurp-python/install-run.html.\n", + "\n", + "Sometimes it makes more sense to create a **Python script** instead of a Jupyter notebook. Scripts are plain code files written that run from top to bottom, and they don't save the output. Their file extension is `.py`." + ], + "metadata": { + "id": "trglTsK2KqlH" + } + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "### Getting started with coding\n", + "\n", + "First, we always have to load **packages** into the notebook using the `import` command! Packages contain additional **functions** that allow us to get more stuff done.\n", + "\n", + "To run a coding cell, you can click the \"play\" button or type `Shift`-`Enter` (PC) or `Shift`-`Return` (Mac) on your keyboard. ***Try this with the cell below:***" + ], + "metadata": { + "id": "db2A18q6WXtQ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-9O6SthNqtT8" + }, + "outputs": [], + "source": [ + "import numpy as np # NumPy is an array and math library\n", + "import matplotlib.pyplot as plt # Matplotlib is a visualization (plotting) library\n", + "import pandas as pd # Pandas lets us work with spreadsheet (.csv) data\n", + "from datetime import datetime, timedelta # Datetime helps us work with dates and times" + ] + }, + { + "cell_type": "markdown", + "source": [ + "When we write `import numpy as np`, we are telling Python: \"import the package NumPy and we will access it using the abbreviation `np` from here onwards.\" You could technically use any abbreviation, but `np` is standard for NumPy." + ], + "metadata": { + "id": "vmaVXRWMAxMO" + } + }, + { + "cell_type": "code", + "source": [ + "# This is a comment. Nothing happens when this cell is executed!" + ], + "metadata": { + "id": "9SMheEZQO57M" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Often we'd like to add notes to our code. You can do this using **Python comments** (which are not the same as Google Colab comments).\n", + "\n", + "Python comments are notated using a `#` (hash) symbol. Everything after the `#` is ignored and not treated like code.\n", + "\n", + "***Can you add a Python comment to the code cell above, then execute the cell? What happens?***" + ], + "metadata": { + "id": "_b8DR4MoAlCW" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Part 2: Variables, math, and string formatting" + ], + "metadata": { + "id": "J2FPr4LV86OO" + } + }, + { + "cell_type": "markdown", + "source": [ + "We can use Python as a calculator. Run the cell below:" + ], + "metadata": { + "id": "ESXwBOoGW8qS" + } + }, + { + "cell_type": "code", + "source": [ + "3 + 9" + ], + "metadata": { + "id": "5LUJIIQ6XWi0", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "14b52a83-0621-43dd-d6cc-1a7fe2041cfd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "12" + ] + }, + "metadata": {}, + "execution_count": 70 + } + ] + }, + { + "cell_type": "code", + "source": [ + "2 + 5" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_o7WAAKpDxIP", + "outputId": "4e8ae172-92d0-4e43-b538-668650c782d1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "7" + ] + }, + "metadata": {}, + "execution_count": 71 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Note that parentheses can be used to change the order of operations:" + ], + "metadata": { + "id": "SOh05mH6EpLI" + } + }, + { + "cell_type": "code", + "source": [ + "1 + 2 * 3 + 4" + ], + "metadata": { + "id": "P06zR16eEi3O", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0cee5bad-f458-4c89-94f2-65d3d1049fd5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "11" + ] + }, + "metadata": {}, + "execution_count": 72 + } + ] + }, + { + "cell_type": "code", + "source": [ + "(1 + 2) * (3 + 4)" + ], + "metadata": { + "id": "tLOCMyTBEvck", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "88efba69-4a72-40e5-fbaa-5a9df4e0e96c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "21" + ] + }, + "metadata": {}, + "execution_count": 73 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "If Python doesn't recognize the code, it will give an **error**.\n", + "\n", + "***Run the code below. What helpful information does the resulting error message include?***" + ], + "metadata": { + "id": "s5D32op-iCGK" + } + }, + { + "cell_type": "code", + "source": [ + "# Uncomment the line below to run:\n", + "# 3 + hello" + ], + "metadata": { + "id": "uCPfRriciBXp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***Can you figure out how to multiply and divide numbers? Try doing some math yourself below.***" + ], + "metadata": { + "id": "P3n99f5yXZPs" + } + }, + { + "cell_type": "code", + "source": [ + "10 * 2" + ], + "metadata": { + "id": "PmQitlaNXfKF", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "33b2b16a-cb44-4ce3-b13a-631e0944f1ee" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "20" + ] + }, + "metadata": {}, + "execution_count": 75 + } + ] + }, + { + "cell_type": "code", + "source": [ + "10 / 2" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e609gBCwRNrc", + "outputId": "7ca5263a-77f6-4e70-9985-ab34265873d6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "5.0" + ] + }, + "metadata": {}, + "execution_count": 76 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Usually, Python needs to be told when to \"print\" something to the screen. For this, we use the **`print()` function**:" + ], + "metadata": { + "id": "B3kVB6JVXksP" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"Hello world!\")" + ], + "metadata": { + "id": "PQeI0aJbXstQ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e1b09072-24fc-41cb-dbaf-74977ac78524" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Hello world!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Notice how the function requires a set of parentheses, `( )`, which follow immediately after the name of the function (`print`).\n", + "\n", + "***Try writing code to print a different message:***" + ], + "metadata": { + "id": "gnjpRXeOiZsz" + } + }, + { + "cell_type": "code", + "source": [ + "print('SURP is amazing!')" + ], + "metadata": { + "id": "WGWdyQjEibZE", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "fe713dca-2c28-47fb-98d9-9e5bdcbf23ce" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "SURP is amazing!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Note how comments are used in two ways below, both to describe a section of code and to annotate a specific line:" + ], + "metadata": { + "id": "qetc0zzL13rG" + } + }, + { + "cell_type": "code", + "source": [ + "# This is a section comment\n", + "print('This is not a comment')\n", + "print('This is also not a comment') # This is a line comment" + ], + "metadata": { + "id": "CWCYvrgX2IX9", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "88ca5491-d090-4f2c-e93f-da0503dad946" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "This is not a comment\n", + "This is also not a comment\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "In Python, we use **variables** to store information. Variables can be numbers (**integers** or **floats**), combinations of characters (called **strings**), a **boolean** (which are either True or False), or other variables that are generally called \"**objects**\".\n", + "\n", + "To save (or **\"assign\"**) a variable, we use the equal sign (`=`). You can name your variable anything descriptive, as long as it's one word! Note that underscore (`_`) can be used to join words in a variable name." + ], + "metadata": { + "id": "2yuXcWy5XxWX" + } + }, + { + "cell_type": "code", + "source": [ + "a = -5 # This variable is an \"integer\" because is a whole number (a number without a decimal point)\n", + "almost_ten = 9.9 # This variable is a \"float\" because is a floating point number (a number with a decimal point)\n", + "scientific = 2e3 # This variable is also a float, and is written in scientific notation: 2.0 x 10^3 = 2000\n", + "\n", + "topic = 'OCEAN' # This variable is called a string\n", + "topic_2 = \"ATMOSPHERE\" # You can also specify strings using double quotation marks\n", + "\n", + "this_is_a_boolean = True # This variable is a boolean" + ], + "metadata": { + "id": "OGHwCzCiYOiv" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(a)" + ], + "metadata": { + "id": "p-6FzKFwYNJ9", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "17d34292-2552-4b80-c841-ceb4287911b5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "-5\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(almost_ten)" + ], + "metadata": { + "id": "47-LhFOaYQ0m", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5a6449da-2af8-4e4e-fa4e-82685a9fe03c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "9.9\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(scientific)" + ], + "metadata": { + "id": "XumLM8cKGAiC", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b63ce9af-5b49-4a85-f1e0-b2f6a6ba8176" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2000.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(topic)\n", + "print(topic_2)" + ], + "metadata": { + "id": "qD3PPGarYXdF", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b434444f-881a-4882-d878-e2ea503229b3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "OCEAN\n", + "ATMOSPHERE\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(this_is_a_boolean)" + ], + "metadata": { + "id": "Hegia9C2GdUw", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "09d370d2-f31a-4c65-adaf-984d1cf1ccca" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "True\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "You can do math or other operations on the same line where you create a variable!" + ], + "metadata": { + "id": "0rlqnIOZ9NlL" + } + }, + { + "cell_type": "code", + "source": [ + "result = 2025 - 1915\n", + "print(result)" + ], + "metadata": { + "id": "PuWeV09m9VA_", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a991e1e3-ee6d-4de5-f8b7-82dcd83d264a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "110\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "You can also change a variable using this compact notation:\n", + "* `a += b` is the same as `a = a + b`\n", + "* `a -= b` is the same as `a = a - b`\n", + "* `a *= b` is the same as `a = a * b`" + ], + "metadata": { + "id": "4KD9Yq3lFHH2" + } + }, + { + "cell_type": "code", + "source": [ + "result += 50\n", + "print(result)" + ], + "metadata": { + "id": "zPlOmwLpFcu9", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4f7e796c-6eb5-41a4-e266-d3f4ef5388d0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "160\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***Try the following:***\n", + "1. ***Search on Google for the formula to convert Fahrenheit to Celsius.***\n", + "2. ***Save a variable with the current Seattle temperature in Fahrenheit (feel free to guess, or look it up).***\n", + "3. ***In one line, create a new variable with that temperature converted into Celsius using the math formula.***\n", + "4. ***Print the result!***" + ], + "metadata": { + "id": "OBmYHJ93MZO1" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n", + "temp = 62 # Fahrenheit\n", + "new_temp = (temp - 32) * (5/9) # Celsius\n", + "print(new_temp)" + ], + "metadata": { + "id": "oV3vRYprMont", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "23fe7257-107d-4f61-eaa2-51ed1a5cefac" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "16.666666666666668\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Note that Python treats booleans (`True` and `False`) like the integers 1 and 0, respectively. ***This means you can do math with booleans. What will the code produce below, and why?***" + ], + "metadata": { + "id": "aIjuN0miGoUt" + } + }, + { + "cell_type": "code", + "source": [ + "print((False * 5) + (True * 3))" + ], + "metadata": { + "id": "pIt2B0QQG6TX", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "866b6cf6-060a-4012-b5df-3136fcc53065" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***What happens when you add two strings together? Try it below.***" + ], + "metadata": { + "id": "1rp4Jnh27cgX" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n", + "print('APL' + ' ' + 'SURP')" + ], + "metadata": { + "id": "K1CmVOqj7hYP", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4ccd359d-479f-4f7a-906e-fbb1a787b40e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "APL SURP\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "You can also create strings that include variables! Here are two ways:\n", + "\n", + "The first way is called an **f-string**, for \"formatted string\". To create one, start the string with the letter `f` and embed the variables inside using curly brackets (`{...}`):" + ], + "metadata": { + "id": "Fn9m6TiyTpoy" + } + }, + { + "cell_type": "code", + "source": [ + "example_f_str = f'The value of almost_ten is {almost_ten}'\n", + "print(example_f_str)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gnMVlfr1TomG", + "outputId": "5a1ec494-dd54-4c9b-da64-09fcd48bedce" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The value of almost_ten is 9.9\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The second way is using the **`.format()`** method. Add this to the end of a string. Variables or listed inside the parentheses, separated by commas, can be referred to in order using curly brackets: `{0}`, `{1}`, `{2}`, etc.:" + ], + "metadata": { + "id": "FuA4DfNKUa6z" + } + }, + { + "cell_type": "code", + "source": [ + "other_example = 'Some SURP interns will study the {0} and the {1}'.format(topic,topic_2)\n", + "print(other_example)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8NhijODjUwrn", + "outputId": "64a49e1f-9300-46da-9303-6ca25f9fea66" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Some SURP interns will study the OCEAN and the ATMOSPHERE\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Part 3: Lists, 1-D arrays, indexing, and slicing" + ], + "metadata": { + "id": "aUDER1Q4Htyy" + } + }, + { + "cell_type": "markdown", + "source": [ + "To store multiple numbers, we can use **lists** or **NumPy arrays**. Lists and arrays are types of variables, and NumPy is one of the packages that we imported at the top of this notebook. Here's how we create a list or array:" + ], + "metadata": { + "id": "_u4V8X5zYWnc" + } + }, + { + "cell_type": "code", + "source": [ + "my_list = [1,2,3,4,5]" + ], + "metadata": { + "id": "DeEk5f6tGt1I" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "my_array = np.array([1,2,3,4,5,6,7,8,9])" + ], + "metadata": { + "id": "stU_2biAYpWF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(my_list)\n", + "print(my_array)" + ], + "metadata": { + "id": "1ZZAFrtPYqTi", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c825347f-573c-411e-80d8-668619a0052e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1, 2, 3, 4, 5]\n", + "[1 2 3 4 5 6 7 8 9]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "You can add elements to the end of a list by **appending**. The syntax is:\n", + "\n", + "> **`list_name.append(NEW_ELEMENT)`**" + ], + "metadata": { + "id": "KF7f04zmPAva" + } + }, + { + "cell_type": "code", + "source": [ + "# Append to the list that you created earlier:\n", + "my_list.append(6)\n", + "my_list.append(7)\n", + "print(my_list)" + ], + "metadata": { + "id": "3l95QDNjPON5", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9747bce5-c29f-4890-82fb-dca163dc788f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1, 2, 3, 4, 5, 6, 7]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "You can convert a list to an array by putting it inside **`np.array()`**:" + ], + "metadata": { + "id": "lakCzdpAOyys" + } + }, + { + "cell_type": "code", + "source": [ + "converted = np.array(my_list)\n", + "\n", + "print(converted)" + ], + "metadata": { + "id": "qVHEFrDVO30V", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4389d026-130b-488d-e066-fe11a7b7295b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1 2 3 4 5 6 7]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Here's a big difference between the two: A list can store a combination of numbers and strings, while an array can store only one variable type (so just numbers, *or* just strings)." + ], + "metadata": { + "id": "tTFXh0wFH_WO" + } + }, + { + "cell_type": "code", + "source": [ + "combo_list = ['element #1', 2, 'element #3', 4]\n", + "print(combo_list)" + ], + "metadata": { + "id": "cNOev1VOH-rf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "cec6b1b1-bca6-400b-d76c-0eed3f462518" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['element #1', 2, 'element #3', 4]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Arrays allow us to do math. This is very useful!\n", + "\n", + "***Before running the cells below, what do you expect will be the result of each line of code?***" + ], + "metadata": { + "id": "55hydvn0YtqH" + } + }, + { + "cell_type": "code", + "source": [ + "my_array + 5" + ], + "metadata": { + "id": "eLmXjAhFYs8U", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e26b4faf-28bc-4b31-9fc6-6006e0419dd2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 6, 7, 8, 9, 10, 11, 12, 13, 14])" + ] + }, + "metadata": {}, + "execution_count": 99 + } + ] + }, + { + "cell_type": "code", + "source": [ + "my_array * 2" + ], + "metadata": { + "id": "ob3atI21Y1WW", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "71c38fda-0a48-4ff5-c25b-2bbbf370d176" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 2, 4, 6, 8, 10, 12, 14, 16, 18])" + ] + }, + "metadata": {}, + "execution_count": 100 + } + ] + }, + { + "cell_type": "code", + "source": [ + "my_array + my_array" + ], + "metadata": { + "id": "185UbNiqY3Db", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "df80410e-d2e6-47ed-d041-98d62eb6a6ff" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 2, 4, 6, 8, 10, 12, 14, 16, 18])" + ] + }, + "metadata": {}, + "execution_count": 101 + } + ] + }, + { + "cell_type": "code", + "source": [ + "np.array([50,100,150]) + np.array([1,1,1])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oF9a4UnUNZKD", + "outputId": "ec60ddfa-8afa-44c0-fb8f-382728af91c2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 51, 101, 151])" + ] + }, + "metadata": {}, + "execution_count": 102 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***What happens when you add two lists together? Try it!***" + ], + "metadata": { + "id": "_V4cxdvBQwdy" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n", + "[1,2,3] + [4,5]" + ], + "metadata": { + "id": "SrKe3oWZQ1bg", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3266fe80-eaff-4fde-e6a0-43c5185f50b5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1, 2, 3, 4, 5]" + ] + }, + "metadata": {}, + "execution_count": 103 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***How about when you multiply a list with an integer?***" + ], + "metadata": { + "id": "yR6ZLdaecjju" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n", + "var = [1, 2, 3, 4] * 4\n", + "print(var)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DbvXoXdYci6v", + "outputId": "1b24ab56-abee-430f-8703-795b50bf8d22" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "### Indexing and slicing\n", + "\n", + "If we want to retrieve certain elements from a list or array, we need to count the position of the elements, which we call an **index**. More than one index are **indices**. In Python, **indices start at 0, not 1**. For example:\n", + "\n", + "* List: `['A', 'B', 'C', 'D', 'E', 'F', 'G']`\n", + "\n", + "* Indices: A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6\n", + "\n", + "To extract the element, we can **index** or **slice** into the list or array using a bracket **[ ]** after the variable name:\n", + "\n", + "* Indexing: **`variable_name[INDEX]`**\n", + "* Slicing: **`variable_name[START_INDEX:END_INDEX]`**\n", + "\n", + "Note that when slicing, `END` is exclusive, so it is the index *after* the final element that you want. Also, either `START` or `END` are optional.\n", + "\n", + "***Run each cell below and think about why the results make sense:***" + ], + "metadata": { + "id": "oTg8kxr7GB1i" + } + }, + { + "cell_type": "code", + "source": [ + "year = [2,0,2,5]\n", + "print(year)" + ], + "metadata": { + "id": "VfMxSqQESQxF", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "83dc095f-c45f-457f-cc62-0ad989fe1a12" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[2, 0, 2, 5]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Examples of indexing:\n", + "print(year[0])\n", + "print(year[3])\n", + "print(year[-1]) # This is pretty neat! Negative indexing counts backwards from the end" + ], + "metadata": { + "id": "31P9AAA63yxZ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "521772e1-b8cb-4b06-d2c7-332f0a4b7114" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2\n", + "5\n", + "5\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Examples of slicing starting from the first element:\n", + "print(year[0:4])\n", + "print(year[0:])" + ], + "metadata": { + "id": "HF2d3rOc3zD5", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d1ec74d0-74a9-4089-8f77-fde562a3a86a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[2, 0, 2, 5]\n", + "[2, 0, 2, 5]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Examples of slicing to or from the 2nd element (at index #1):\n", + "print(year[:1])\n", + "print(year[1:])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w1CaXKAMfz01", + "outputId": "4b33e279-8af0-4d59-cce9-d5b4af3e81eb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[2]\n", + "[0, 2, 5]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***Can you find two different ways to extract the last two elements (`[2,5]`) of the variable `year`?***\n", + "\n", + "***Try using one of them to save (`[2,5]`) into a new variable.***" + ], + "metadata": { + "id": "UfnTZRSI5Q91" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n", + "\n", + "# Option 1:\n", + "year = np.array([2,0,2,5])\n", + "a = year[-2:]\n", + "print(a)\n", + "\n", + "# Option 2:\n", + "year = np.array([2,0,2,5])\n", + "a = year[2:]\n", + "print(a)" + ], + "metadata": { + "id": "9AtXnl7A5tL9", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "45b6ece5-f226-4ad9-ebc3-29af83f3abe2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[2 5]\n", + "[2 5]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Similarly, you can use indexing or slicing to assign new values in specific elements in a list or array:" + ], + "metadata": { + "id": "fzu-AQ4pTbSZ" + } + }, + { + "cell_type": "code", + "source": [ + "print(year) # Before modifying last element\n", + "year[3] = 9\n", + "print(year) # After modifying last element" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9jEogCoNQfTq", + "outputId": "d0ddb1db-578f-405b-9277-999e92b33e1f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[2 0 2 5]\n", + "[2 0 2 9]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***What will `array_to_modify` be after the following assignments? Test your prediction by printing the variable below:***" + ], + "metadata": { + "id": "vlfG--UHT_pY" + } + }, + { + "cell_type": "code", + "source": [ + "array_to_modify = np.array([10,20,30,40,50])\n", + "array_to_modify[0] = 0\n", + "array_to_modify[1:4] = np.array([21,31,41])\n", + "array_to_modify[4] *= 2" + ], + "metadata": { + "id": "wvH6Lpb4Ti9d" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n", + "print(array_to_modify)" + ], + "metadata": { + "id": "ZSBSfaHEUJCQ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c5447c8b-8aac-4097-cd28-88bbe8d0dfaa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[ 0 21 31 41 100]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We can track changes in a variable over time by printing the variable after each change:" + ], + "metadata": { + "id": "SpH8r3Ys3Nyj" + } + }, + { + "cell_type": "code", + "source": [ + "array_to_modify = np.array([10,20,30,40,50])\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[0] = 0\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[1:4] = np.array([21,31,41])\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[4] *= 2\n", + "print(array_to_modify)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Xu30NiRChCl7", + "outputId": "6b41144d-f270-4b5d-9656-edc87addf310" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[10 20 30 40 50]\n", + "[ 0 20 30 40 50]\n", + "[ 0 21 31 41 50]\n", + "[ 0 21 31 41 100]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***What happens when you index or slice into a string? Try it!***" + ], + "metadata": { + "id": "fW9RymUp9st2" + } + }, + { + "cell_type": "code", + "source": [ + "my_string = 'projector'\n", + "\n", + "# Write your code here:\n", + "my_string[6:]" + ], + "metadata": { + "id": "CVt-kKZF90xq", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 140 + }, + "outputId": "c0418c58-001a-4982-b35f-ae0285112cda" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'tor'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 114 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "You can use the function **`len()`** to get the length of a list, array, or string (we'll talk more about functions later):" + ], + "metadata": { + "id": "gVlew6ZSjoT9" + } + }, + { + "cell_type": "code", + "source": [ + "print(len(year))\n", + "print(len(my_string))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XxqggautjvEg", + "outputId": "a91e7aad-4120-4f1e-9c73-600848bd8d98" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "4\n", + "9\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Part 4: Debugging code" + ], + "metadata": { + "id": "N6kV_Lat6DBG" + } + }, + { + "cell_type": "markdown", + "source": [ + "It is completely normal to make mistakes when writing code. Finding the mistakes is the biggest challenge in programming.\n", + "\n", + "We refer to this process of finding and correcting mistakes, or \"bugs\", as **debugging**.\n", + "\n", + "This flowchart below offers some tips for where to start, depending on whether your code is generating an **error** or just failing to work silently:" + ], + "metadata": { + "id": "s_7bn1VD6Hv4" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "*Image source: [pythonforbiologists.com](https://pythonforbiologists.com/29-common-beginner-errors-on-one-page.html)*" + ], + "metadata": { + "id": "_o0CAyL67IB2" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step 1: Start by reading your code, line by line" + ], + "metadata": { + "id": "FoWR4dj06zZO" + } + }, + { + "cell_type": "markdown", + "source": [ + "The best way to start debugging is almost always by reading your code carefully line-by-line to understand what is happening. This is known as the \"rubber duck method,\" and is explained below:" + ], + "metadata": { + "id": "S1Vt_wf_63bu" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "*Image source: [rubberduckdebugging.com](https://rubberduckdebugging.com/)*" + ], + "metadata": { + "id": "87G_tlnF8anz" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step 2: Diagnose errors\n", + "\n", + "If your code generated an error, read the error. The error will say something like `SyntaxError` and will highlight the line of code that produced the error.\n", + "\n", + "The type of error should give you a good clue as to what went wrong. For example, a `SyntaxError` means that your code doesn't follow the correct syntax rules. It might be missing a parenthesis, a quote mark, or some other syntax-related issue.\n", + "\n", + "If you're not sure what the error means, then copy and paste the error line into Google search." + ], + "metadata": { + "id": "LjvwVBGW8qMw" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step 3: Trace your code using `print` statements" + ], + "metadata": { + "id": "aNE10T9S9Pb8" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sometimes it's hard to tell what your code is doing because you don't know what certain variables are doing.\n", + "\n", + "A useful debugging technique is to add `print()` statements throughout your code to **\"trace\"** how variables change as they get assigned and modified.\n", + "\n", + "You saw a demonstration of this technique earlier in the indexing and slicing section:" + ], + "metadata": { + "id": "rkkcX5bO9SAm" + } + }, + { + "cell_type": "code", + "source": [ + "array_to_modify = np.array([10,20,30,40,50])\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[0] = 0\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[1:4] = np.array([21,31,41])\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[4] *= 2\n", + "print(array_to_modify)" + ], + "metadata": { + "id": "PyuKCSmJ6HBg", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "13e129cc-29b0-49ec-8bf6-717a7bf34e2e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[10 20 30 40 50]\n", + "[ 0 20 30 40 50]\n", + "[ 0 21 31 41 50]\n", + "[ 0 21 31 41 100]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Step 4: Consult Google or ChatGPT\n", + "\n", + "If all else fails and you just can't figure out what is happening in your code, use Google or ChatGPT.\n", + "\n", + "Google will probably only be helpful if you have an error. But for the most challenging errors, Google is often better than ChatGPT. Usually a search will return useful question-and-answer threads on [StackOverflow.com](https://stackoverflow.com/questions).\n", + "\n", + "ChatGPT can sometimes interpret multiple lines of code, if you ask it to find a bug. But be aware that the solutions that ChatGPT offers may not be correct or efficient.\n", + "\n", + "No matter what solution you find, **make sure that you understand how and why the code works** before using it in your project." + ], + "metadata": { + "id": "m-zqS74H9y5q" + } + } + ] +} \ No newline at end of file diff --git a/site/lessons/APL_SURP_Python_course_Notebook_1_blank_copy.ipynb b/site/lessons/APL_SURP_Python_course_Notebook_1_blank_copy.ipynb new file mode 100644 index 0000000..04c50e9 --- /dev/null +++ b/site/lessons/APL_SURP_Python_course_Notebook_1_blank_copy.ipynb @@ -0,0 +1,1253 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "collapsed_sections": [ + "J2FPr4LV86OO", + "aUDER1Q4Htyy", + "N6kV_Lat6DBG" + ] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# **APL SURP Python course** - Notebook 1 (blank version)\n", + "### ***Introduction to Python, variables, lists, 1-D arrays, and debugging***\n", + "\n", + "*Created for the University of Washington Applied Physics Laboratory's Summer Undergraduate Research Program (SURP) 2025.*" + ], + "metadata": { + "id": "OxvLAQ1SWpeR" + } + }, + { + "cell_type": "markdown", + "source": [ + "For additional resources on Python basics, you can consult the following resources on the APL-SURP Python course website:\n", + "* Tutorials on Python fundamentals: https://uw-apl-surp.github.io/aplsurp-python/overview.html\n", + "* Complementary lessons on specific Python topics: https://uw-apl-surp.github.io/aplsurp-python/complementary_lessons.html" + ], + "metadata": { + "id": "17Wn8Uio3ea6" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Part 1: Python and notebooks" + ], + "metadata": { + "id": "UszPR4dNY8sW" + } + }, + { + "cell_type": "markdown", + "source": [ + "**Computer code** allows us to work with data, create visualizations, and create repeatable scientific workflows. It is an integral part of the modern scientific method!\n", + "\n", + "Every programming language has a specific **syntax**. In English as well as programming languages, syntax describes valid combinations of symbols and words:\n", + "* Syntactically invalid: \"boy dog cat\"\n", + "* Syntactically valid: \"boy hugs cat\"\n", + "* Syntactically valid (but **semantically** invalid): \"cat hugs boy\"\n", + "\n", + "**Semantics** refer to whether a phrase has meaning. It's up to us to write computer code that has scientific meaning and is useful. The computer will allow us to write code that is syntactically valid but semantically – or scientifically – incorrect!\n", + "\n", + "\n", + "---" + ], + "metadata": { + "id": "stif3BkqXGuD" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "Djp9pEel9qA6" + } + }, + { + "cell_type": "markdown", + "source": [ + "(*Image source: [tiobe.com](https://www.tiobe.com/tiobe-index/)*)\n", + "\n", + "As of 2025, Python is likely the most widely-used programming language in the world, and its popularity continues to rise above other languages." + ], + "metadata": { + "id": "E752-6589-dV" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "lXb-zwLyIJ_i" + } + }, + { + "cell_type": "markdown", + "source": [ + "No programming language is perfect. As the inventor of C++ once said, *“There are only two kinds of programming languages: the ones people complain about and the ones nobody uses.”*\n", + "\n", + "However, there are many reasons that we use Python instead of other programming languages, like MATLAB, Java, or C:\n", + "- It's free!\n", + "- It reads a bit like written English, so it's easier to write and understand\n", + "- It's old, so it's very stable (Python was created in 1991)\n", + "- It can do almost anything\n", + "- It's incredibly popular inside and outside of science (so it could help you land a job)\n", + "- It's open source, which means anyone can help to improve it\n", + "\n", + "For more discussion of why Python is valuable and why its open source origin is important, see the APL-SURP Python course website: https://uw-apl-surp.github.io/aplsurp-python/python.html.\n", + "\n", + "***Question: How many of you have heard of Python before this course? Who has written code in Python before? Keeping your hands up, who has written code in any language before?***" + ], + "metadata": { + "id": "uKH9xnsPIJSB" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "LmkHnoGLJcAi" + } + }, + { + "cell_type": "markdown", + "source": [ + "This web page is called a **Jupyter notebook**. It lets us write and run computer code, and the results get displayed and saved alongside the code.\n", + "\n", + "In a Jupyter notebook, you can mix and match **code cells** and **text cells**. Text cells don't get interpreted as Python code. You can double-click on a text cell to edit it.\n", + "\n", + "***Try creating a new text cell using the menu button (+TEXT), write a message in it, then delete the cell.***\n", + "\n", + "Just like in a Google Doc, you can add comments to a cell using the button at the top right of the cell. ***Try this!***" + ], + "metadata": { + "id": "YkJewcBFh3eR" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "TzrsyTR8KlFl" + } + }, + { + "cell_type": "markdown", + "source": [ + "***Question: When we run Python code in this notebook, where is the code actually being run?***\n", + "\n", + "**Google Colab** (short for Colaboratory) is an online platform that lets you create and execute Jupyter notebooks from your web browser, like Chrome or Safari. Colab is free to use.\n", + "\n", + "When you create a new Colab notebook, it is stored in your Google Drive account. You can share Colab notebooks just like you would share a Google Doc using the \"Share\" menu at the top right of the page.\n", + "\n", + "You can download this notebook to your computer using the File menu -> Download. The file extension will be `.ipynb`. You can run Jupyter notebooks on your computer (what we would call **\"running locally\"**) if you have Python and key packages installed. If you wish to install and run Python locally, the APL-SURP Python course website offers information on doing so: https://uw-apl-surp.github.io/aplsurp-python/install-run.html.\n", + "\n", + "Sometimes it makes more sense to create a **Python script** instead of a Jupyter notebook. Scripts are plain code files written that run from top to bottom, and they don't save the output. Their file extension is `.py`." + ], + "metadata": { + "id": "trglTsK2KqlH" + } + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "### Getting started with coding\n", + "\n", + "First, we always have to load **packages** into the notebook using the `import` command! Packages contain additional **functions** that allow us to get more stuff done.\n", + "\n", + "To run a coding cell, you can click the \"play\" button or type `Shift`-`Enter` (PC) or `Shift`-`Return` (Mac) on your keyboard. ***Try this with the cell below:***" + ], + "metadata": { + "id": "db2A18q6WXtQ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-9O6SthNqtT8" + }, + "outputs": [], + "source": [ + "import numpy as np # NumPy is an array and math library\n", + "import matplotlib.pyplot as plt # Matplotlib is a visualization (plotting) library\n", + "import pandas as pd # Pandas lets us work with spreadsheet (.csv) data\n", + "from datetime import datetime, timedelta # Datetime helps us work with dates and times" + ] + }, + { + "cell_type": "markdown", + "source": [ + "When we write `import numpy as np`, we are telling Python: \"import the package NumPy and we will access it using the abbreviation `np` from here onwards.\" You could technically use any abbreviation, but `np` is standard for NumPy." + ], + "metadata": { + "id": "vmaVXRWMAxMO" + } + }, + { + "cell_type": "code", + "source": [ + "# This is a comment. Nothing happens when this cell is executed!" + ], + "metadata": { + "id": "9SMheEZQO57M" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Often we'd like to add notes to our code. You can do this using **Python comments** (which are not the same as Google Colab comments).\n", + "\n", + "Python comments are notated using a `#` (hash) symbol. Everything after the `#` is ignored and not treated like code.\n", + "\n", + "***Can you add a Python comment to the code cell above, then execute the cell? What happens?***" + ], + "metadata": { + "id": "_b8DR4MoAlCW" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Part 2: Variables, math, and string formatting" + ], + "metadata": { + "id": "J2FPr4LV86OO" + } + }, + { + "cell_type": "markdown", + "source": [ + "We can use Python as a calculator. Run the cell below:" + ], + "metadata": { + "id": "ESXwBOoGW8qS" + } + }, + { + "cell_type": "code", + "source": [ + "3 + 9" + ], + "metadata": { + "id": "5LUJIIQ6XWi0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "2 + 5" + ], + "metadata": { + "id": "_o7WAAKpDxIP" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Note that parentheses can be used to change the order of operations:" + ], + "metadata": { + "id": "SOh05mH6EpLI" + } + }, + { + "cell_type": "code", + "source": [ + "1 + 2 * 3 + 4" + ], + "metadata": { + "id": "P06zR16eEi3O" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "(1 + 2) * (3 + 4)" + ], + "metadata": { + "id": "tLOCMyTBEvck" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "If Python doesn't recognize the code, it will give an **error**.\n", + "\n", + "***Run the code below. What helpful information does the resulting error message include?***" + ], + "metadata": { + "id": "s5D32op-iCGK" + } + }, + { + "cell_type": "code", + "source": [ + "# Uncomment the line below to run:\n", + "# 3 + hello" + ], + "metadata": { + "id": "uCPfRriciBXp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***Can you figure out how to multiply and divide numbers? Try doing some math yourself below.***" + ], + "metadata": { + "id": "P3n99f5yXZPs" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "PmQitlaNXfKF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "e609gBCwRNrc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Usually, Python needs to be told when to \"print\" something to the screen. For this, we use the **`print()` function**:" + ], + "metadata": { + "id": "B3kVB6JVXksP" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"Hello world!\")" + ], + "metadata": { + "id": "PQeI0aJbXstQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Notice how the function requires a set of parentheses, `( )`, which follow immediately after the name of the function (`print`).\n", + "\n", + "***Try writing code to print a different message:***" + ], + "metadata": { + "id": "gnjpRXeOiZsz" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "WGWdyQjEibZE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Note how comments are used in two ways below, both to describe a section of code and to annotate a specific line:" + ], + "metadata": { + "id": "qetc0zzL13rG" + } + }, + { + "cell_type": "code", + "source": [ + "# This is a section comment\n", + "print('This is not a comment')\n", + "print('This is also not a comment') # This is a line comment" + ], + "metadata": { + "id": "CWCYvrgX2IX9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "In Python, we use **variables** to store information. Variables can be numbers (**integers** or **floats**), combinations of characters (called **strings**), a **boolean** (which are either True or False), or other variables that are generally called \"**objects**\".\n", + "\n", + "To save (or **\"assign\"**) a variable, we use the equal sign (`=`). You can name your variable anything descriptive, as long as it's one word! Note that underscore (`_`) can be used to join words in a variable name." + ], + "metadata": { + "id": "2yuXcWy5XxWX" + } + }, + { + "cell_type": "code", + "source": [ + "a = -5 # This variable is an \"integer\" because is a whole number (a number without a decimal point)\n", + "almost_ten = 9.9 # This variable is a \"float\" because is a floating point number (a number with a decimal point)\n", + "scientific = 2e3 # This variable is also a float, and is written in scientific notation: 2.0 x 10^3 = 2000\n", + "\n", + "topic = 'OCEAN' # This variable is called a string\n", + "topic_2 = \"ATMOSPHERE\" # You can also specify strings using double quotation marks\n", + "\n", + "this_is_a_boolean = True # This variable is a boolean" + ], + "metadata": { + "id": "OGHwCzCiYOiv" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(a)" + ], + "metadata": { + "id": "p-6FzKFwYNJ9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(almost_ten)" + ], + "metadata": { + "id": "47-LhFOaYQ0m" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(scientific)" + ], + "metadata": { + "id": "XumLM8cKGAiC" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(topic)\n", + "print(topic_2)" + ], + "metadata": { + "id": "qD3PPGarYXdF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(this_is_a_boolean)" + ], + "metadata": { + "id": "Hegia9C2GdUw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can do math or other operations on the same line where you create a variable!" + ], + "metadata": { + "id": "0rlqnIOZ9NlL" + } + }, + { + "cell_type": "code", + "source": [ + "result = 2025 - 1915\n", + "print(result)" + ], + "metadata": { + "id": "PuWeV09m9VA_" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can also change a variable using this compact notation:\n", + "* `a += b` is the same as `a = a + b`\n", + "* `a -= b` is the same as `a = a - b`\n", + "* `a *= b` is the same as `a = a * b`" + ], + "metadata": { + "id": "4KD9Yq3lFHH2" + } + }, + { + "cell_type": "code", + "source": [ + "result += 50\n", + "print(result)" + ], + "metadata": { + "id": "zPlOmwLpFcu9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***Try the following:***\n", + "1. ***Search on Google for the formula to convert Fahrenheit to Celsius.***\n", + "2. ***Save a variable with the current Seattle temperature in Fahrenheit (feel free to guess, or look it up).***\n", + "3. ***In one line, create a new variable with that temperature converted into Celsius using the math formula.***\n", + "4. ***Print the result!***" + ], + "metadata": { + "id": "OBmYHJ93MZO1" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n" + ], + "metadata": { + "id": "oV3vRYprMont" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Note that Python treats booleans (`True` and `False`) like the integers 1 and 0, respectively. ***This means you can do math with booleans. What will the code produce below, and why?***" + ], + "metadata": { + "id": "aIjuN0miGoUt" + } + }, + { + "cell_type": "code", + "source": [ + "print((False * 5) + (True * 3))" + ], + "metadata": { + "id": "pIt2B0QQG6TX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***What happens when you add two strings together? Try it below.***" + ], + "metadata": { + "id": "1rp4Jnh27cgX" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n" + ], + "metadata": { + "id": "K1CmVOqj7hYP" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can also create strings that include variables! Here are two ways:\n", + "\n", + "The first way is called an **f-string**, for \"formatted string\". To create one, start the string with the letter `f` and embed the variables inside using curly brackets (`{...}`):" + ], + "metadata": { + "id": "Fn9m6TiyTpoy" + } + }, + { + "cell_type": "code", + "source": [ + "example_f_str = f'The value of almost_ten is {almost_ten}'\n", + "print(example_f_str)" + ], + "metadata": { + "id": "gnMVlfr1TomG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The second way is using the **`.format()`** method. Add this to the end of a string. Variables or listed inside the parentheses, separated by commas, can be referred to in order using curly brackets: `{0}`, `{1}`, `{2}`, etc.:" + ], + "metadata": { + "id": "FuA4DfNKUa6z" + } + }, + { + "cell_type": "code", + "source": [ + "other_example = 'Some SURP interns will study the {0} and the {1}'.format(topic,topic_2)\n", + "print(other_example)" + ], + "metadata": { + "id": "8NhijODjUwrn" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Part 3: Lists, 1-D arrays, indexing, and slicing" + ], + "metadata": { + "id": "aUDER1Q4Htyy" + } + }, + { + "cell_type": "markdown", + "source": [ + "To store multiple numbers, we can use **lists** or **NumPy arrays**. Lists and arrays are types of variables, and NumPy is one of the packages that we imported at the top of this notebook. Here's how we create a list or array:" + ], + "metadata": { + "id": "_u4V8X5zYWnc" + } + }, + { + "cell_type": "code", + "source": [ + "my_list = [1,2,3,4,5]" + ], + "metadata": { + "id": "DeEk5f6tGt1I" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "my_array = np.array([1,2,3,4,5,6,7,8,9])" + ], + "metadata": { + "id": "stU_2biAYpWF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(my_list)\n", + "print(my_array)" + ], + "metadata": { + "id": "1ZZAFrtPYqTi" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can add elements to the end of a list by **appending**. The syntax is:\n", + "\n", + "> **`list_name.append(NEW_ELEMENT)`**" + ], + "metadata": { + "id": "KF7f04zmPAva" + } + }, + { + "cell_type": "code", + "source": [ + "# Append to the list that you created earlier:\n", + "my_list.append(6)\n", + "my_list.append(7)\n", + "print(my_list)" + ], + "metadata": { + "id": "3l95QDNjPON5" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can convert a list to an array by putting it inside **`np.array()`**:" + ], + "metadata": { + "id": "lakCzdpAOyys" + } + }, + { + "cell_type": "code", + "source": [ + "converted = np.array(my_list)\n", + "\n", + "print(converted)" + ], + "metadata": { + "id": "qVHEFrDVO30V" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Here's a big difference between the two: A list can store a combination of numbers and strings, while an array can store only one variable type (so just numbers, *or* just strings)." + ], + "metadata": { + "id": "tTFXh0wFH_WO" + } + }, + { + "cell_type": "code", + "source": [ + "combo_list = ['element #1', 2, 'element #3', 4]\n", + "print(combo_list)" + ], + "metadata": { + "id": "cNOev1VOH-rf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Arrays allow us to do math. This is very useful!\n", + "\n", + "***Before running the cells below, what do you expect will be the result of each line of code?***" + ], + "metadata": { + "id": "55hydvn0YtqH" + } + }, + { + "cell_type": "code", + "source": [ + "my_array + 5" + ], + "metadata": { + "id": "eLmXjAhFYs8U" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "my_array * 2" + ], + "metadata": { + "id": "ob3atI21Y1WW" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "my_array + my_array" + ], + "metadata": { + "id": "185UbNiqY3Db" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "np.array([50,100,150]) + np.array([1,1,1])" + ], + "metadata": { + "id": "oF9a4UnUNZKD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***What happens when you add two lists together? Try it!***" + ], + "metadata": { + "id": "_V4cxdvBQwdy" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n" + ], + "metadata": { + "id": "SrKe3oWZQ1bg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***How about when you multiply a list with an integer?***" + ], + "metadata": { + "id": "yR6ZLdaecjju" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n" + ], + "metadata": { + "id": "DbvXoXdYci6v" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "---\n", + "\n", + "### Indexing and slicing\n", + "\n", + "If we want to retrieve certain elements from a list or array, we need to count the position of the elements, which we call an **index**. More than one index are **indices**. In Python, **indices start at 0, not 1**. For example:\n", + "\n", + "* List: `['A', 'B', 'C', 'D', 'E', 'F', 'G']`\n", + "\n", + "* Indices: A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6\n", + "\n", + "To extract the element, we can **index** or **slice** into the list or array using a bracket **[ ]** after the variable name:\n", + "\n", + "* Indexing: **`variable_name[INDEX]`**\n", + "* Slicing: **`variable_name[START_INDEX:END_INDEX]`**\n", + "\n", + "Note that when slicing, `END` is exclusive, so it is the index *after* the final element that you want. Also, either `START` or `END` are optional.\n", + "\n", + "***Run each cell below and think about why the results make sense:***" + ], + "metadata": { + "id": "oTg8kxr7GB1i" + } + }, + { + "cell_type": "code", + "source": [ + "year = [2,0,2,5]\n", + "print(year)" + ], + "metadata": { + "id": "VfMxSqQESQxF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Examples of indexing:\n", + "print(year[0])\n", + "print(year[3])\n", + "print(year[-1]) # This is pretty neat! Negative indexing counts backwards from the end" + ], + "metadata": { + "id": "31P9AAA63yxZ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Examples of slicing starting from the first element:\n", + "print(year[0:4])\n", + "print(year[0:])" + ], + "metadata": { + "id": "HF2d3rOc3zD5" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Examples of slicing to or from the 2nd element (at index #1):\n", + "print(year[:1])\n", + "print(year[1:])" + ], + "metadata": { + "id": "w1CaXKAMfz01" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***Can you find two different ways to extract the last two elements (`[2,5]`) of the variable `year`?***\n", + "\n", + "***Try using one of them to save (`[2,5]`) into a new variable.***" + ], + "metadata": { + "id": "UfnTZRSI5Q91" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n" + ], + "metadata": { + "id": "9AtXnl7A5tL9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Similarly, you can use indexing or slicing to assign new values in specific elements in a list or array:" + ], + "metadata": { + "id": "fzu-AQ4pTbSZ" + } + }, + { + "cell_type": "code", + "source": [ + "print(year) # Before modifying last element\n", + "year[3] = 9\n", + "print(year) # After modifying last element" + ], + "metadata": { + "id": "9jEogCoNQfTq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***What will `array_to_modify` be after the following assignments? Test your prediction by printing the variable below:***" + ], + "metadata": { + "id": "vlfG--UHT_pY" + } + }, + { + "cell_type": "code", + "source": [ + "array_to_modify = np.array([10,20,30,40,50])\n", + "array_to_modify[0] = 0\n", + "array_to_modify[1:4] = np.array([21,31,41])\n", + "array_to_modify[4] *= 2" + ], + "metadata": { + "id": "wvH6Lpb4Ti9d" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n" + ], + "metadata": { + "id": "ZSBSfaHEUJCQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "We can track changes in a variable over time by printing the variable after each change:" + ], + "metadata": { + "id": "mFQSEuMe3Dt8" + } + }, + { + "cell_type": "code", + "source": [ + "array_to_modify = np.array([10,20,30,40,50])\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[0] = 0\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[1:4] = np.array([21,31,41])\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[4] *= 2\n", + "print(array_to_modify)" + ], + "metadata": { + "id": "Xu30NiRChCl7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "***What happens when you index or slice into a string? Try it!***" + ], + "metadata": { + "id": "fW9RymUp9st2" + } + }, + { + "cell_type": "code", + "source": [ + "my_string = 'projector'\n", + "\n", + "# Write your code here:\n" + ], + "metadata": { + "id": "CVt-kKZF90xq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can use the function **`len()`** to get the length of a list, array, or string (we'll talk more about functions later):" + ], + "metadata": { + "id": "gVlew6ZSjoT9" + } + }, + { + "cell_type": "code", + "source": [ + "print(len(year))\n", + "print(len(my_string))" + ], + "metadata": { + "id": "XxqggautjvEg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Part 4: Debugging code" + ], + "metadata": { + "id": "N6kV_Lat6DBG" + } + }, + { + "cell_type": "markdown", + "source": [ + "It is completely normal to make mistakes when writing code. Finding the mistakes is the biggest challenge in programming.\n", + "\n", + "We refer to this process of finding and correcting mistakes, or \"bugs\", as **debugging**.\n", + "\n", + "This flowchart below offers some tips for where to start, depending on whether your code is generating an **error** or just failing to work silently:" + ], + "metadata": { + "id": "s_7bn1VD6Hv4" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "*Image source: [pythonforbiologists.com](https://pythonforbiologists.com/29-common-beginner-errors-on-one-page.html)*" + ], + "metadata": { + "id": "_o0CAyL67IB2" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step 1: Start by reading your code, line by line" + ], + "metadata": { + "id": "FoWR4dj06zZO" + } + }, + { + "cell_type": "markdown", + "source": [ + "The best way to start debugging is almost always by reading your code carefully line-by-line to understand what is happening. This is known as the \"rubber duck method,\" and is explained below:" + ], + "metadata": { + "id": "S1Vt_wf_63bu" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "*Image source: [rubberduckdebugging.com](https://rubberduckdebugging.com/)*" + ], + "metadata": { + "id": "87G_tlnF8anz" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step 2: Diagnose errors\n", + "\n", + "If your code generated an error, read the error. The error will say something like `SyntaxError` and will highlight the line of code that produced the error.\n", + "\n", + "The type of error should give you a good clue as to what went wrong. For example, a `SyntaxError` means that your code doesn't follow the correct syntax rules. It might be missing a parenthesis, a quote mark, or some other syntax-related issue.\n", + "\n", + "If you're not sure what the error means, then copy and paste the error line into Google search." + ], + "metadata": { + "id": "LjvwVBGW8qMw" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Step 3: Trace your code using `print` statements" + ], + "metadata": { + "id": "aNE10T9S9Pb8" + } + }, + { + "cell_type": "markdown", + "source": [ + "Sometimes it's hard to tell what your code is doing because you don't know what certain variables are doing.\n", + "\n", + "A useful debugging technique is to add `print()` statements throughout your code to **\"trace\"** how variables change as they get assigned and modified.\n", + "\n", + "You saw a demonstration of this technique earlier in the indexing and slicing section:" + ], + "metadata": { + "id": "rkkcX5bO9SAm" + } + }, + { + "cell_type": "code", + "source": [ + "array_to_modify = np.array([10,20,30,40,50])\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[0] = 0\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[1:4] = np.array([21,31,41])\n", + "print(array_to_modify)\n", + "\n", + "array_to_modify[4] *= 2\n", + "print(array_to_modify)" + ], + "metadata": { + "id": "PyuKCSmJ6HBg", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "13e129cc-29b0-49ec-8bf6-717a7bf34e2e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[10 20 30 40 50]\n", + "[ 0 20 30 40 50]\n", + "[ 0 21 31 41 50]\n", + "[ 0 21 31 41 100]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Step 4: Consult Google or ChatGPT\n", + "\n", + "If all else fails and you just can't figure out what is happening in your code, use Google or ChatGPT.\n", + "\n", + "Google will probably only be helpful if you have an error. But for the most challenging errors, Google is often better than ChatGPT. Usually a search will return useful question-and-answer threads on [StackOverflow.com](https://stackoverflow.com/questions).\n", + "\n", + "ChatGPT can sometimes interpret multiple lines of code, if you ask it to find a bug. But be aware that the solutions that ChatGPT offers may not be correct or efficient.\n", + "\n", + "No matter what solution you find, **make sure that you understand how and why the code works** before using it in your project." + ], + "metadata": { + "id": "m-zqS74H9y5q" + } + } + ] +} \ No newline at end of file diff --git a/site/lessons/APL_SURP_Python_course_Notebook_2.ipynb b/site/lessons/APL_SURP_Python_course_Notebook_2.ipynb new file mode 100644 index 0000000..7ae78be --- /dev/null +++ b/site/lessons/APL_SURP_Python_course_Notebook_2.ipynb @@ -0,0 +1,2107 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "collapsed_sections": [ + "0d7WUudK97z_", + "F31svxgPNP_t", + "zyTGovhuH1_p", + "ESibLGhhRp8n", + "tYzchKpM7DlA", + "c4IvRKVyvJwu" + ] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# **APL SURP Python course** - Notebook 2 (completed version)\n", + "### ***2-D arrays, tabular data, datetimes, and more***\n", + "\n", + "*Created for the University of Washington Applied Physics Laboratory's Summer Undergraduate Research Program (SURP) 2025.*" + ], + "metadata": { + "id": "OxvLAQ1SWpeR" + } + }, + { + "cell_type": "markdown", + "source": [ + "For additional resources on Python basics, you can consult the following resources on the APL-SURP Python course website:\n", + "* Tutorials on Python fundamentals: https://uw-apl-surp.github.io/aplsurp-python/overview.html\n", + "* Complementary lessons on specific Python topics: https://uw-apl-surp.github.io/aplsurp-python/complementary_lessons.html" + ], + "metadata": { + "id": "17Wn8Uio3ea6" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "-9O6SthNqtT8" + }, + "outputs": [], + "source": [ + "import numpy as np # NumPy is an array and math library\n", + "import matplotlib.pyplot as plt # Matplotlib is a visualization (plotting) library\n", + "import pandas as pd # Pandas lets us work with spreadsheet (.csv) data\n", + "from datetime import datetime, timedelta # Datetime helps us work with dates and times" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Warm-up: Debugging activity" + ], + "metadata": { + "id": "0d7WUudK97z_" + } + }, + { + "cell_type": "markdown", + "source": [ + "The following code contains numerous mistakes — at least 10 unique ones. ***Can you fix them all so that the code runs and makes sense?***\n", + "\n", + "To keep track of your fixes, add a comment (`#`) at the end of lines you've changed and note what you modified." + ], + "metadata": { + "id": "g1wPIafc-BOI" + } + }, + { + "cell_type": "code", + "source": [ + "# Here are the Beatles' names\n", + "1st_beatle = 'John'\n", + "2nd_beatle = 'Paul\n", + "3rd_beatle = 'George\"\n", + "4th_beatle = 'Ringo'\n", + "\n", + "# Here are the Beatles' ages, in order of their names\n", + "ages = ['23','21','20','23']\n", + "\n", + "# This is the age range of the Beatles (= oldest age minus youngest age)\n", + "age range = age[1] - age[3]\n", + "\n", + "# Here are some print statements\n", + "print('The Beatles were (ages[1]), (ages[2]), (ages[3]), 'and' (ages[4]) when they arrived in America.')\n", + "print('The youngest Beatle's name was' + 3rd_beatle + '.')" + ], + "metadata": { + "id": "O67SjAqe-XmJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Copy the code from above and fix it below:\n", + "\n", + "# Here are the Beatles' names\n", + "beatle1 = 'John' # changed variable names to not begin with numbers\n", + "beatle2 = 'Paul' # added missing closing quote mark\n", + "beatle3 = 'George' # changed closing double quote mark to single quotes to match\n", + "beatle4 = 'Ringo'\n", + "\n", + "# Here are the Beatles' ages, in order of their names\n", + "ages = [23,21,20,23] # changed from strings to integers to allow mathematical operations\n", + "\n", + "# This is the age range of the Beatles (= oldest age minus youngest age)\n", + "age_range = ages[0] - ages[2] # fixed variable name typo (ages, not age)\n", + " # added underscore in variable name (age_range)\n", + " # changed indexing to start at 0, not 1\n", + "\n", + "# Here are some print statements\n", + "print(f'The Beatles were {ages[0]}, {ages[1]}, {ages[2]}, and {ages[3]} when they arrived in America.')\n", + " # changed indexing to start at 0, not 1\n", + " # added 'f' to start of formatted string\n", + " # changed parentheses around variables to curly brackets\n", + " # removed interior quotation marks\n", + "\n", + "print(\"The youngest Beatle's name was \" + beatle3 + '.') # updated variable name; added space\n", + " # option 1: changed to double quotes\n", + "print('The youngest Beatle\\'s name was ' + beatle3 + '.') # option 2: \"escaped\" the quote mark using back slash" + ], + "metadata": { + "id": "UeacsQXc-qFk", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "67d25fe0-0936-4bbe-f207-bc95c5ddc4a9" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The Beatles were 23, 21, 20, and 23 when they arrived in America.\n", + "The youngest Beatle's name was George.\n", + "The youngest Beatle's name was George.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Part 1: 2-D arrays" + ], + "metadata": { + "id": "F31svxgPNP_t" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "*Image source: [digitalearthafrica.org](https://training.digitalearthafrica.org/en/latest/python_basics/02_numpy.html)*" + ], + "metadata": { + "id": "3OOs6KssCtBW" + } + }, + { + "cell_type": "markdown", + "source": [ + "NumPy arrays can also be **two-dimensional** (or higher dimensions). Whoa!\n", + "\n", + "This allows us to represent data on multiple **axes**. Arrays can also be 3-D, 4-D, or higher-dimensional!\n", + "\n", + "2-D arrays can be defined using nested brackets: **[ [ ], [ ], [ ], etc. ]**. Below, I've created a 2-D NumPy array where each column represents the average monthly temperatures for a city. Each row is a different city. I found the data for [New York, NY](https://en.climate-data.org/north-america/united-states-of-america/new-york/new-york-1091/#climate-table) (top row - index 0) and [Seattle, WA](https://en.climate-data.org/north-america/united-states-of-america/washington/seattle-593/#climate-table) (bottom row - index 1) on [climate-data.org](https://en.climate-data.org/)." + ], + "metadata": { + "id": "f-vWngOeHAP7" + } + }, + { + "cell_type": "code", + "source": [ + "temp = np.array([[30.3,32.0,39.4,50.8,60.9,70.3,76.0,74.5,68.1,56.6,45.8,36.5], # (New York; temperatures in °F)\n", + " [40.0,40.6,44.2,48.4,54.9,60.2,66.2,66.7,60.5,52.0,44.5,39.6]]) # (Seattle)\n", + "\n", + "print(temp)" + ], + "metadata": { + "id": "UtAc_AUKHFZC", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6b9af132-9c83-47b0-8805-98d989b1c496" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[30.3 32. 39.4 50.8 60.9 70.3 76. 74.5 68.1 56.6 45.8 36.5]\n", + " [40. 40.6 44.2 48.4 54.9 60.2 66.2 66.7 60.5 52. 44.5 39.6]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Just like `len()` gives the length of a 1-D array, the command **`.shape`** (a property, not a function!) gives the dimensions of a 2-D (or 3-D, 4-D, etc.) array:" + ], + "metadata": { + "id": "3MpPjhtuknQg" + } + }, + { + "cell_type": "code", + "source": [ + "temp.shape # returns: (number of rows, number of columns)" + ], + "metadata": { + "id": "jOqkONCIkwpS", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ffea1265-fbe6-4037-be32-b5b8b10ee734" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(2, 12)" + ] + }, + "metadata": {}, + "execution_count": 117 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**Axis 0** goes across rows and **axis 1** goes down columns.\n", + "\n", + "We still index and slice into 2-D arrays using brackets, but now we need to use a comma (`,`) to separate the indices for each dimension:\n", + "\n", + "> **`array_name[ROW_INDEX, COLUMN_INDEX]`**\n", + "\n", + "So if we want to get the temperature in New York (row index 0) in June (month #6 = Python column index 5), we would write:" + ], + "metadata": { + "id": "foHcxcjTJFgo" + } + }, + { + "cell_type": "code", + "source": [ + "print(temp[0,5])" + ], + "metadata": { + "id": "8QsmFBS_JFAW", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5f1080b8-715f-499e-913d-2c4743a3f161" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "70.3\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***Use indexing to retrieve the December average temperature in Seattle. Print your result:***" + ], + "metadata": { + "id": "HvY3DkuCLGaK" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code below\n", + "print(temp[1,11])" + ], + "metadata": { + "id": "P7Ki5VVqLMY-", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2bb27293-e51d-4be5-80b3-79225f2b1b3f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "39.6\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Slicing works the same way. Instead of a single row or column index, use a range of indices:\n", + "\n", + "> **`array_name[ROW_START:ROW_END, COLUMN_START:COLUMN_END]`**\n", + "\n", + "To get all the elements along a certain axis, just use a single colon, `:`." + ], + "metadata": { + "id": "q08mizMUJ9Mn" + } + }, + { + "cell_type": "markdown", + "source": [ + "***Try using slicing to get the temperatures for the first half of the year for New York:***" + ], + "metadata": { + "id": "vFO3sKq0LZtj" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code below\n", + "temp[0,0:6]" + ], + "metadata": { + "id": "N_iFNlNELfuN", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4f91e2b9-4f98-4b03-a7a3-7b5c46908318" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([30.3, 32. , 39.4, 50.8, 60.9, 70.3])" + ] + }, + "metadata": {}, + "execution_count": 120 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***Next, try using slicing to obtain the average temperatures for both cities in August. Which city is warmer?***" + ], + "metadata": { + "id": "DRvUqWbrLCeo" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code below\n", + "temp[:,7]" + ], + "metadata": { + "id": "qDGhe5fuLkjj", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "98624c28-7730-43de-f8b7-432b87b7972a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([74.5, 66.7])" + ] + }, + "metadata": {}, + "execution_count": 121 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "***Finally, using slicing and mathematical operations to calculate the average temperature for Seattle between June to August (three months). You got this!***" + ], + "metadata": { + "id": "QJ6ZQMAbL0a5" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code below\n", + "average_temp = (temp[1,5] + temp[1,6] + temp[1,7]) / 3\n", + "print(average_temp)" + ], + "metadata": { + "id": "HB_tMQP_MAFP", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4b581768-2eb5-44ea-a875-2185408e797f" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "64.36666666666667\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Part 2: Using functions" + ], + "metadata": { + "id": "zyTGovhuH1_p" + } + }, + { + "cell_type": "markdown", + "source": [ + "You've already learned at least three functions: `print()`, `np.array()`, and `len()`. Functions usually take one or more input **arguments** inside the parentheses, with multiple arguments separated by commas. Then the function can output, or \"**return**\", something back.\n", + "\n", + "Sometimes a function will do something without returning anything. For example, `print()` doesn't return anything — it just prints the input to the screen.\n", + "\n", + "Let's learn a few other functions..." + ], + "metadata": { + "id": "yyBf1Z4xYlcY" + } + }, + { + "cell_type": "markdown", + "source": [ + "The NumPy function **`np.arange(START, END, INTERVAL)`** creates an array of numbers from START to END with a certain INTERVAL between each number.\n", + "\n", + "***Can you guess what the result of the code below will be before running it?***" + ], + "metadata": { + "id": "ccT1EH-aLxgb" + } + }, + { + "cell_type": "code", + "source": [ + "np.arange(0,40,5)" + ], + "metadata": { + "id": "gEV7V5IXZXiD", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9e4e538f-aeda-4f01-e713-4aa98558a599" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 0, 5, 10, 15, 20, 25, 30, 35])" + ] + }, + "metadata": {}, + "execution_count": 150 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Note that **`np.arange(END)`** is a shorter way of writing **`np.arange(0,END,1)`**:" + ], + "metadata": { + "id": "MlBxrW2iSEQr" + } + }, + { + "cell_type": "code", + "source": [ + "print(np.arange(10))\n", + "print(np.arange(0,10,1))" + ], + "metadata": { + "id": "Ts_1PauNSMNR", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "84f28aae-8284-492f-99b2-5f33d669f640" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0 1 2 3 4 5 6 7 8 9]\n", + "[0 1 2 3 4 5 6 7 8 9]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Additionally, the NumPy package has many useful functions for mathematical operations. You can find them on the NumPy website: https://numpy.org/doc/stable/reference/routines.html. Here are a few of them:\n", + "\n", + "* **`np.mean(INPUT)`** calculates the average value of the elements in an `INPUT` list or NumPy\n", + "array\n", + "* **`np.sum(INPUT)`** calculates the sum of the elements in an `INPUT` list or array\n", + "* **`np.max(INPUT)`** and **`np.min(INPUT)`** find the maximum or minimum values in `INPUT`\n", + "* **`np.ones(N)`** creates a new array of length `N` filled with the integer `1`\n", + "* **`np.zeros(N)`** creates a new array of length `N` filled with the integer `0`\n", + "\n", + "For example:" + ], + "metadata": { + "id": "j7r_aHOBKj63" + } + }, + { + "cell_type": "code", + "source": [ + "# We can do some math on the following array:\n", + "test = np.array([1,2,3])\n", + "print(np.mean(test))\n", + "print(np.sum(test))\n", + "print(np.max(test))\n", + "\n", + "# Create new arrays:\n", + "print(np.ones(5))\n", + "print(np.zeros(5))" + ], + "metadata": { + "id": "57G_kWHPLOis", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1fda1a1f-fcfa-4eec-87c1-3e291fdbd476" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.0\n", + "6\n", + "3\n", + "[1. 1. 1. 1. 1.]\n", + "[0. 0. 0. 0. 0.]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Many functions can be **called** (applied) to a variable in two different ways. For example:" + ], + "metadata": { + "id": "SDu-P969RI9_" + } + }, + { + "cell_type": "code", + "source": [ + "print(np.mean(test)) # Option 1" + ], + "metadata": { + "id": "HQEVUCGIROpg", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1ea9e670-8f5d-4dc2-b597-e6286bab034c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(test.mean()) # Option 2 (same result!)" + ], + "metadata": { + "id": "PSI8XXGTRRho", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e4267d13-3da7-4f90-d8f5-09b19e6cb1c1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.0\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "To learn more about a function, you can always consult its online **documentation**! A package's documentation website usually has a page for each function describing its arguments, outputs, and examples of how to use it. [Here](https://numpy.org/doc/2.3/) is NumPy's documentation page, including a user guide.\n", + "\n", + "***Google \"numpy mean\" to find the documentation page for that function. How is the webpage structured, and what information does it tell us about the arguments needed to apply `np.mean()` to 2-D arrays?***" + ], + "metadata": { + "id": "Sds1U_tEE1vr" + } + }, + { + "cell_type": "markdown", + "source": [ + "Now that you've discovered named arguments... ***use `np.mean()` to calculate and print the average annual (yearly) temperatures in New York and Seattle using the variable `temp` from earlier:***" + ], + "metadata": { + "id": "DS5_V9VfH4nQ" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n", + "np.mean(temp,axis=1)" + ], + "metadata": { + "id": "A1sFRubGNqvb", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2581e2bc-9bec-4801-a803-8d8f2fdaefd8" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([53.43333333, 51.48333333])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "As a shortcut, you can access a function's documentation within Google Colab by typing a `?` mark followed by the function name without parentheses. ***Try it!***" + ], + "metadata": { + "id": "ZQmIKZK4IEV_" + } + }, + { + "cell_type": "code", + "source": [ + "?np.mean" + ], + "metadata": { + "id": "-b1ypjcqITBg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Google is a powerful way to discover functions that accomplish a task you need to do. Can you try the following?\n", + "1. ***Using Google, find a function that calculates the standard deviation of a NumPy array.***\n", + "2. ***Now, use that function to calculate and print the standard deviation of Seattle's monthly temperatures below.***" + ], + "metadata": { + "id": "0Y1_tk-VIjLj" + } + }, + { + "cell_type": "code", + "source": [ + "# Write your code here:\n", + "print(f'The standard deviation is {np.std(temp[1,:])}°F')\n", + "\n", + "# Note that we can format the number to two decimal places by adding \":.2f\" after the function\n", + "print(f'The standard deviation is {np.std(temp[1,:]):.2f}°F')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "B3Jqxq1qI1E7", + "outputId": "acbaaee9-84c3-44bc-bebe-cb095b050efc" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The standard deviation is 9.661765999144372°F\n", + "The standard deviation is 9.66°F\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Part 3: Missing data" + ], + "metadata": { + "id": "ESibLGhhRp8n" + } + }, + { + "cell_type": "markdown", + "source": [ + "In the real world, you'll frequently encounter missing data in an array.\n", + "\n", + "Missing data is represented by the float **`np.nan`** or **`np.NaN`** (the two are the same). NaN stands for \"Not a Number\".\n", + "\n", + "Note that NaN values are different than `None`, which is a specific object in Python that can be used as a placeholder value." + ], + "metadata": { + "id": "ArvW0A6qmKQ0" + } + }, + { + "cell_type": "code", + "source": [ + "pH_measurements = np.array([7.84, 7.91, 8.05, np.nan, 7.96, 8.03])\n", + "\n", + "print(pH_measurements)" + ], + "metadata": { + "id": "SZn9Cq_7mbcz", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9a2bb199-2333-40ac-bae3-0e3eb0abe949" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[7.84 7.91 8.05 nan 7.96 8.03]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We can test for missing values using the function **`np.isnan()`**, which returns a boolean (or a boolean array when applied to an array):" + ], + "metadata": { + "id": "sqSHEDzWnNmH" + } + }, + { + "cell_type": "code", + "source": [ + "print(np.isnan(5))" + ], + "metadata": { + "id": "sfF17YZ3nTcF", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9c050fde-504f-4473-80e5-370a244990c5" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "False\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(np.isnan(np.nan))" + ], + "metadata": { + "id": "h-bbsucSnVDv", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a697f687-d6ac-40de-b061-f2ebd7791cce" + }, + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "True\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(np.isnan(pH_measurements))" + ], + "metadata": { + "id": "WzRxDMMWnMSQ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "09c6b2fc-8ab3-4b38-9b76-e5cf6fcd2b31" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[False False False True False False]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "It's good to be aware that missing data can cause functions like `np.mean()` to fail:" + ], + "metadata": { + "id": "ljnxDSL-nyup" + } + }, + { + "cell_type": "code", + "source": [ + "print(np.mean(pH_measurements))" + ], + "metadata": { + "id": "JF6HxqP3n50y", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "97e058e2-64ba-42ed-84c5-30eb1603685e" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "nan\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Many functions have a \"NaN-safe\" version that ignores missing values and still calculates the result, such as **`np.nanmean()`**:" + ], + "metadata": { + "id": "9AiNIclLoANX" + } + }, + { + "cell_type": "code", + "source": [ + "print(np.nanmean(pH_measurements))" + ], + "metadata": { + "id": "DElGwXISoIQd", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2ec32663-327b-4336-8b57-474c685687cb" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "7.958\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Part 4: Loading tabular data" + ], + "metadata": { + "id": "tYzchKpM7DlA" + } + }, + { + "cell_type": "markdown", + "source": [ + "Up until now, we've been using data that we've typed directly into Python. However, most real-world data is stored in files that we'd like to open using Python.\n", + "\n", + "The most common type of data file is a **spreadsheet**, which has rows and columns. Generally, the columns will have column labels. This type of 1-D or 2-D data is also called **tabular data** because you can store it in a table.\n", + "\n", + "Sometimes there is only one column of data, such as a **time series** of, say, date vs. sea surface temperature.\n", + "\n", + "Tabular data is often stored in **comma-separated value (CSV)** format, with the file extension being `.csv`. Data files in this format can be opened using Microsoft Excel or Google Sheets, as well as Python. Other times it is stored in Microsoft Excel's `.xlsx` format." + ], + "metadata": { + "id": "m1oUdBof9lLK" + } + }, + { + "cell_type": "markdown", + "source": [ + "In Python, we use the `pandas` package to work with tabular data. Remember that we imported the package earlier using:\n", + "\n", + "> `import pandas as pd`\n", + "\n", + "Just like NumPy has the `array` object, Pandas has two types of objects: `Series` and `DataFrame`. This is what they look like:\n", + "" + ], + "metadata": { + "id": "1BPxKNjg7SZq" + } + }, + { + "cell_type": "markdown", + "source": [ + "For now, we'll just be applying simple operations to read spreadsheet data using `pandas`. But if you would like to learn more, check out these [lesson slides](https://ethan-campbell.github.io/OCEAN_215/materials/lessons/lesson_9.pdf) or these lesson videos ([Part 1](https://www.youtube.com/watch?v=ev_qX-czUKs), [Part 2](https://www.youtube.com/watch?v=56ZRM7u06nM)). You can find the `pandas` user guide [here](https://pandas.pydata.org/)." + ], + "metadata": { + "id": "HQr4nB64_8p0" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "*Image source: [UW](https://www.ocean.washington.edu/story/RV_Rachel_Carson)*" + ], + "metadata": { + "id": "Btg6WMqyK0sK" + } + }, + { + "cell_type": "markdown", + "source": [ + "First, let's download two `.csv` data files from Google Drive here: https://drive.google.com/drive/folders/1Am6XdlB-APQ3ccOvLeGK8DFPQ2OnPeJD?usp=share_link. Each file is a conductivity-temperature-depth (CTD) cast that was collected from the ship R/V *Rachel Carson* off of Carkeek Park near Seattle. ***Save these two files to your computer.***\n", + "\n", + "Next, we can upload the files to this Google Colab notebook. ***Click the sidebar folder icon on the left, then use the page-with-arrow icon at the top to select the files and upload them.*** Note that uploaded files will be deleted from Google Colab when you refresh this notebook!\n", + "\n", + "We will specify each **filepath** using string variables:" + ], + "metadata": { + "id": "czGyp7MTAc5T" + } + }, + { + "cell_type": "code", + "source": [ + "filepath_0 = '/content/2023051001001_Carkeek.csv'\n", + "filepath_1 = '/content/2023051101001_Carkeek.csv'" + ], + "metadata": { + "id": "gnrD640dB5ds" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now, we can load the files using `pandas`:\n", + "\n", + "> **`pd.read_csv(FILEPATH, ARGUMENTS...)`**\n", + "\n", + "This function is very customizable using the many optional `ARGUMENTS`, which allow it to handle almost any file. You can find documentation about the `pd.read_csv()` arguments [at this link](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html).\n", + "\n", + "***Let's first take a look at the data file using a simple text editor. Notice the long header. What argument can we use to exclude the header from being loaded?***\n", + "\n", + "Below, we'll load each data file using ``pd.read_csv()`` and store each file into a new variable.\n", + "\n", + "We can look at the data using **`display()`** (which is a fancy version of `print()` for `DataFrame` objects):" + ], + "metadata": { + "id": "XaUCH7ikB6Sy" + } + }, + { + "cell_type": "code", + "source": [ + "data_0 = pd.read_csv(filepath_0,comment='#')\n", + "data_1 = pd.read_csv(filepath_1,comment='#')\n", + "\n", + "display(data_0)" + ], + "metadata": { + "id": "4boQwvSg7R5J", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 449 + }, + "outputId": "504f3da0-0c2b-456f-90c0-f7f9eaf67059" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " Unnamed: 0 index altM CStarTr0 c0mS/cm density00 depSM \\\n", + "0 0 3407 98.53 71.0825 31.662958 1021.7317 2.101 \n", + "1 1 3408 98.53 71.0825 31.662061 1021.7317 2.005 \n", + "2 2 3409 98.53 71.0825 31.661464 1021.7323 2.045 \n", + "3 3 3410 98.53 71.0825 31.660448 1021.7323 2.005 \n", + "4 4 3411 98.53 71.0825 31.658416 1021.7325 1.981 \n", + "... ... ... ... ... ... ... ... \n", + "8200 8200 11607 11.99 83.1087 31.920640 1024.1134 173.726 \n", + "8201 8201 11608 11.99 83.1087 31.920640 1024.1135 173.726 \n", + "8202 8202 11609 11.99 83.1087 31.920820 1024.1141 173.846 \n", + "8203 8203 11610 11.99 83.1087 31.920579 1024.1129 173.613 \n", + "8204 8204 11611 11.99 83.1087 31.920340 1024.1135 173.846 \n", + "\n", + " latitude longitude flECO-AFL ... sbeox0Mg/L sbeox0ML/L ph \\\n", + "0 47.71418 -122.40854 2.8127 ... 10.6450 7.4488 9.271 \n", + "1 47.71418 -122.40854 2.8127 ... 10.6446 7.4484 9.271 \n", + "2 47.71418 -122.40854 2.8127 ... 10.6443 7.4483 9.271 \n", + "3 47.71418 -122.40854 2.8713 ... 10.6441 7.4481 9.271 \n", + "4 47.71418 -122.40854 3.1057 ... 10.6443 7.4483 9.271 \n", + "... ... ... ... ... ... ... ... \n", + "8200 47.71316 -122.40812 0.1753 ... 7.0198 4.9120 8.788 \n", + "8201 47.71316 -122.40812 0.1753 ... 7.0201 4.9123 8.788 \n", + "8202 47.71316 -122.40812 0.1753 ... 7.0204 4.9125 8.788 \n", + "8203 47.71316 -122.40812 0.1753 ... 7.0205 4.9125 8.783 \n", + "8204 47.71316 -122.40812 0.1753 ... 7.0209 4.9128 8.788 \n", + "\n", + " potemp090C prDM sal00 t090C scan nbf flag \n", + "0 10.2155 2.119 28.3385 10.2157 3408 0 0.0 \n", + "1 10.2140 2.022 28.3388 10.2143 3409 0 0.0 \n", + "2 10.2129 2.062 28.3391 10.2131 3410 0 0.0 \n", + "3 10.2117 2.022 28.3390 10.2119 3411 0 0.0 \n", + "4 10.2093 1.998 28.3389 10.2095 3412 0 0.0 \n", + "... ... ... ... ... ... ... ... \n", + "8200 8.3719 175.266 30.0190 8.3887 11608 0 0.0 \n", + "8201 8.3717 175.266 30.0191 8.3886 11609 0 0.0 \n", + "8202 8.3718 175.387 30.0191 8.3887 11610 0 0.0 \n", + "8203 8.3719 175.152 30.0190 8.3887 11611 0 0.0 \n", + "8204 8.3720 175.387 30.0184 8.3889 11612 0 0.0 \n", + "\n", + "[8205 rows x 21 columns]" + ], + "text/html": [ + "\n", + "
| \n", + " | Unnamed: 0 | \n", + "index | \n", + "altM | \n", + "CStarTr0 | \n", + "c0mS/cm | \n", + "density00 | \n", + "depSM | \n", + "latitude | \n", + "longitude | \n", + "flECO-AFL | \n", + "... | \n", + "sbeox0Mg/L | \n", + "sbeox0ML/L | \n", + "ph | \n", + "potemp090C | \n", + "prDM | \n", + "sal00 | \n", + "t090C | \n", + "scan | \n", + "nbf | \n", + "flag | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "3407 | \n", + "98.53 | \n", + "71.0825 | \n", + "31.662958 | \n", + "1021.7317 | \n", + "2.101 | \n", + "47.71418 | \n", + "-122.40854 | \n", + "2.8127 | \n", + "... | \n", + "10.6450 | \n", + "7.4488 | \n", + "9.271 | \n", + "10.2155 | \n", + "2.119 | \n", + "28.3385 | \n", + "10.2157 | \n", + "3408 | \n", + "0 | \n", + "0.0 | \n", + "
| 1 | \n", + "1 | \n", + "3408 | \n", + "98.53 | \n", + "71.0825 | \n", + "31.662061 | \n", + "1021.7317 | \n", + "2.005 | \n", + "47.71418 | \n", + "-122.40854 | \n", + "2.8127 | \n", + "... | \n", + "10.6446 | \n", + "7.4484 | \n", + "9.271 | \n", + "10.2140 | \n", + "2.022 | \n", + "28.3388 | \n", + "10.2143 | \n", + "3409 | \n", + "0 | \n", + "0.0 | \n", + "
| 2 | \n", + "2 | \n", + "3409 | \n", + "98.53 | \n", + "71.0825 | \n", + "31.661464 | \n", + "1021.7323 | \n", + "2.045 | \n", + "47.71418 | \n", + "-122.40854 | \n", + "2.8127 | \n", + "... | \n", + "10.6443 | \n", + "7.4483 | \n", + "9.271 | \n", + "10.2129 | \n", + "2.062 | \n", + "28.3391 | \n", + "10.2131 | \n", + "3410 | \n", + "0 | \n", + "0.0 | \n", + "
| 3 | \n", + "3 | \n", + "3410 | \n", + "98.53 | \n", + "71.0825 | \n", + "31.660448 | \n", + "1021.7323 | \n", + "2.005 | \n", + "47.71418 | \n", + "-122.40854 | \n", + "2.8713 | \n", + "... | \n", + "10.6441 | \n", + "7.4481 | \n", + "9.271 | \n", + "10.2117 | \n", + "2.022 | \n", + "28.3390 | \n", + "10.2119 | \n", + "3411 | \n", + "0 | \n", + "0.0 | \n", + "
| 4 | \n", + "4 | \n", + "3411 | \n", + "98.53 | \n", + "71.0825 | \n", + "31.658416 | \n", + "1021.7325 | \n", + "1.981 | \n", + "47.71418 | \n", + "-122.40854 | \n", + "3.1057 | \n", + "... | \n", + "10.6443 | \n", + "7.4483 | \n", + "9.271 | \n", + "10.2093 | \n", + "1.998 | \n", + "28.3389 | \n", + "10.2095 | \n", + "3412 | \n", + "0 | \n", + "0.0 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 8200 | \n", + "8200 | \n", + "11607 | \n", + "11.99 | \n", + "83.1087 | \n", + "31.920640 | \n", + "1024.1134 | \n", + "173.726 | \n", + "47.71316 | \n", + "-122.40812 | \n", + "0.1753 | \n", + "... | \n", + "7.0198 | \n", + "4.9120 | \n", + "8.788 | \n", + "8.3719 | \n", + "175.266 | \n", + "30.0190 | \n", + "8.3887 | \n", + "11608 | \n", + "0 | \n", + "0.0 | \n", + "
| 8201 | \n", + "8201 | \n", + "11608 | \n", + "11.99 | \n", + "83.1087 | \n", + "31.920640 | \n", + "1024.1135 | \n", + "173.726 | \n", + "47.71316 | \n", + "-122.40812 | \n", + "0.1753 | \n", + "... | \n", + "7.0201 | \n", + "4.9123 | \n", + "8.788 | \n", + "8.3717 | \n", + "175.266 | \n", + "30.0191 | \n", + "8.3886 | \n", + "11609 | \n", + "0 | \n", + "0.0 | \n", + "
| 8202 | \n", + "8202 | \n", + "11609 | \n", + "11.99 | \n", + "83.1087 | \n", + "31.920820 | \n", + "1024.1141 | \n", + "173.846 | \n", + "47.71316 | \n", + "-122.40812 | \n", + "0.1753 | \n", + "... | \n", + "7.0204 | \n", + "4.9125 | \n", + "8.788 | \n", + "8.3718 | \n", + "175.387 | \n", + "30.0191 | \n", + "8.3887 | \n", + "11610 | \n", + "0 | \n", + "0.0 | \n", + "
| 8203 | \n", + "8203 | \n", + "11610 | \n", + "11.99 | \n", + "83.1087 | \n", + "31.920579 | \n", + "1024.1129 | \n", + "173.613 | \n", + "47.71316 | \n", + "-122.40812 | \n", + "0.1753 | \n", + "... | \n", + "7.0205 | \n", + "4.9125 | \n", + "8.783 | \n", + "8.3719 | \n", + "175.152 | \n", + "30.0190 | \n", + "8.3887 | \n", + "11611 | \n", + "0 | \n", + "0.0 | \n", + "
| 8204 | \n", + "8204 | \n", + "11611 | \n", + "11.99 | \n", + "83.1087 | \n", + "31.920340 | \n", + "1024.1135 | \n", + "173.846 | \n", + "47.71316 | \n", + "-122.40812 | \n", + "0.1753 | \n", + "... | \n", + "7.0209 | \n", + "4.9128 | \n", + "8.788 | \n", + "8.3720 | \n", + "175.387 | \n", + "30.0184 | \n", + "8.3889 | \n", + "11612 | \n", + "0 | \n", + "0.0 | \n", + "
8205 rows × 21 columns
\n", + "| \n", + " | density00 | \n", + "
|---|---|
| 0 | \n", + "1021.7317 | \n", + "
| 1 | \n", + "1021.7317 | \n", + "
| 2 | \n", + "1021.7323 | \n", + "
| 3 | \n", + "1021.7323 | \n", + "
| 4 | \n", + "1021.7325 | \n", + "
| ... | \n", + "... | \n", + "
| 8200 | \n", + "1024.1134 | \n", + "
| 8201 | \n", + "1024.1135 | \n", + "
| 8202 | \n", + "1024.1141 | \n", + "
| 8203 | \n", + "1024.1129 | \n", + "
| 8204 | \n", + "1024.1135 | \n", + "
8205 rows × 1 columns
\n", + "