A dummy data set to illustrate various data cleaning functions.

dummy

Format

A data frame with 4 columns:

Date

The date of the reading.

Variable

The name of the variable.

Value

The value of the reading.

Units

The units of the value.

See also

Examples

demo(dummy)
#> 
#> 
#> 	demo(dummy)
#> 	---- ~~~~~
#> 
#> > # Copyright 2015 Province of British Columbia
#> > #
#> > # Licensed under the Apache License, Version 2.0 (the "License");
#> > # you may not use this file except in compliance with the License.
#> > # You may obtain a copy of the License at
#> > #
#> > # http://www.apache.org/licenses/LICENSE-2.0
#> > #
#> > # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
#> > # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#> > # See the License for the specific language governing permissions and limitations under the License.
#> > 
#> > library(wqbc)
#> 
#> > options(wqbc.messages = TRUE)
#> 
#> > data(dummy)
#> 
#> > print(dummy)
#>          Date            Variable  Value         Units
#> 1  2000-01-01 Aluminium Dissolved   20.0          mg/L
#> 2  2000-01-01 Aluminium Dissolved    1.0          mg/L
#> 3  2000-01-01 Aluminium Dissolved    2.0          mg/L
#> 4  2000-01-01                  pH    8.0      PH UNITS
#> 5  2000-01-01                  pH    9.5      PH UNITS
#> 6  2000-01-02  DISSOLVED ALUMINUM    1.0          MG/L
#> 7  2000-01-02  DISSOLVED ALUMINUM 2000.0          uG/L
#> 8  2000-01-04 Aluminium Dissolved 2000.0          ug/L
#> 9  2000-01-04 Aluminium Dissolved    1.0          mg/L
#> 10 2000-01-05 Aluminium Dissolved    1.0          mg/L
#> 11 2000-01-05 Aluminium Dissolved   40.0          mg/L
#> 12 2000-01-05 Aluminium Dissolved     NA          mg/L
#> 13 2000-01-06 Aluminium Dissolved   10.0          mg/L
#> 14 2000-01-06 Aluminium Dissolved   20.0          mg/L
#> 15 2000-01-06 Aluminium Dissolved     NA          mg/L
#> 16 1977-05-25          Zinc Total    1.0          ug/L
#> 17 1977-05-25          Zinc Total 1000.0 midichlorians
#> 18 1978-12-01          Kryptonite    1.0          ug/L
#> 19 1978-12-01                  pH    7.0      PH UNITS
#> 
#> > dummy$Units <- substitute_units(dummy$Units)
#> Substituted 'MG/L' with 'mg/L', 'PH UNITS' with 'pH' and 'uG/L' with 'ug/L'.
#> Failed to substitute 'midichlorians'.
#> 
#> > dummy$Variable <- substitute_variables(dummy$Variable)
#> Substituted 'DISSOLVED ALUMINUM' with 'Aluminum Dissolved'.
#> Failed to substitute 'Aluminium Dissolved' and 'Kryptonite'.
#> 
#> > print(dummy)
#>          Date           Variable  Value Units
#> 1  2000-01-01               <NA>   20.0  mg/L
#> 2  2000-01-01               <NA>    1.0  mg/L
#> 3  2000-01-01               <NA>    2.0  mg/L
#> 4  2000-01-01                 pH    8.0    pH
#> 5  2000-01-01                 pH    9.5    pH
#> 6  2000-01-02 Aluminum Dissolved    1.0  mg/L
#> 7  2000-01-02 Aluminum Dissolved 2000.0  ug/L
#> 8  2000-01-04               <NA> 2000.0  ug/L
#> 9  2000-01-04               <NA>    1.0  mg/L
#> 10 2000-01-05               <NA>    1.0  mg/L
#> 11 2000-01-05               <NA>   40.0  mg/L
#> 12 2000-01-05               <NA>     NA  mg/L
#> 13 2000-01-06               <NA>   10.0  mg/L
#> 14 2000-01-06               <NA>   20.0  mg/L
#> 15 2000-01-06               <NA>     NA  mg/L
#> 16 1977-05-25         Zinc Total    1.0  ug/L
#> 17 1977-05-25         Zinc Total 1000.0  <NA>
#> 18 1978-12-01               <NA>    1.0  ug/L
#> 19 1978-12-01                 pH    7.0    pH
#> 
#> > dummy <- dplyr::filter(dummy, Units %in% lookup_units() & Variable %in% lookup_variables())
#> 
#> > print(dummy)
#>         Date           Variable  Value Units
#> 1 2000-01-01                 pH    8.0    pH
#> 2 2000-01-01                 pH    9.5    pH
#> 3 2000-01-02 Aluminum Dissolved    1.0  mg/L
#> 4 2000-01-02 Aluminum Dissolved 2000.0  ug/L
#> 5 1977-05-25         Zinc Total    1.0  ug/L
#> 6 1978-12-01                 pH    7.0    pH
#> 
#> > dummy <- standardize_wqdata(dummy)
#> Standardizing water quality data...
#> Standardized water quality data.
#> 
#> > print(dummy)
#> # A tibble: 6 × 4
#>   Date       Variable           Value Units
#>   <date>     <chr>              <dbl> <chr>
#> 1 2000-01-02 Aluminum Dissolved   1   mg/L 
#> 2 2000-01-02 Aluminum Dissolved   2   mg/L 
#> 3 1977-05-25 Zinc Total           1   ug/L 
#> 4 2000-01-01 pH                   8   pH   
#> 5 2000-01-01 pH                   9.5 pH   
#> 6 1978-12-01 pH                   7   pH   
#> 
#> > dummy <- clean_wqdata(dummy)
#> Cleaning water quality data...
#> Identified 0 outliers in water quality data.
#> Cleansed water quality data.
#> 
#> > print(dummy)
#> # A tibble: 4 × 6
#>   Date       Variable           Value Units Outlier DetectionLimit
#>   <date>     <chr>              <dbl> <chr> <lgl>            <dbl>
#> 1 2000-01-02 Aluminum Dissolved  1.5  mg/L  FALSE               NA
#> 2 1977-05-25 Zinc Total          1    ug/L  FALSE               NA
#> 3 1978-12-01 pH                  7    pH    FALSE               NA
#> 4 2000-01-01 pH                  8.75 pH    FALSE               NA
#> 
#> > dummy <- calc_limits(dummy, term = "short")
#> Cleaning water quality data...
#> Identified 0 outliers in water quality data.
#> Cleansed water quality data.
#> Standardizing water quality data...
#> Standardized water quality data.
#> Calculating short-term water quality limits...
#> Dropped 1 values for Zinc Total without limits
#> Dropped 2 values for pH without limits
#> Calculated short-term water quality limits.
#> 
#> > print(dummy)
#> # A tibble: 1 × 7
#>   Date       Variable           Value UpperLimit Units Outlier DetectionLimit
#>   <date>     <chr>              <dbl>      <dbl> <chr> <lgl>            <dbl>
#> 1 2000-01-02 Aluminum Dissolved   1.5        0.1 mg/L  NA                  NA