library(tidyverse)
library(lubridate){
dt1 <- as_date("2020-4-1") %>% as.numeric()
dt2 <- as_date("2020-6-1") %>% as.numeric()
set.seed(123466)
gen_single_id <- function(id){
num_med <- sample(1:3,1)
tibble(med = sample(LETTERS[1:3],num_med,replace = TRUE),
start = sample(dt1:dt2,num_med, replace=TRUE),
end = start + sample(1:100, num_med, replace=TRUE))
}
dat <- tibble(id = 1:200) %>%
mutate(ddd = map(id, gen_single_id))
dat <- dat %>%
unnest(c(ddd)) %>%
arrange(id, med) %>%
mutate(across(c(start,end), ~{as.character(as_date(.))}))
write_csv(dat,"data/time.csv")
}それでは、引き続きスライドで解説した、時間に関する集計についてデータを読み込んで処理を行うことを練習してみましょう。
dat <- read_csv("data/time.csv")Rows: 400 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): med
dbl (1): id
date (2): start, end
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat# A tibble: 400 × 4
id med start end
<dbl> <chr> <date> <date>
1 1 A 2020-04-14 2020-05-07
2 1 B 2020-04-01 2020-04-09
3 2 A 2020-05-12 2020-06-30
4 2 A 2020-05-24 2020-06-17
5 3 A 2020-05-31 2020-08-01
6 4 B 2020-04-03 2020-06-04
7 5 A 2020-04-26 2020-07-09
8 5 B 2020-04-15 2020-05-10
9 5 B 2020-04-15 2020-04-28
10 6 A 2020-05-16 2020-06-14
# ℹ 390 more rows
このデータも、私が作成した架空のデータです。idは対象者、medは処方された薬、startとendで処方開始(start)と終了(end)を記載してあります。
まず、このデータ全体を眺めてみましょう。
dat %>% summary() id med start end
Min. : 1.0 Length:400 Min. :2020-04-01 Min. :2020-04-09
1st Qu.: 51.0 Class :character 1st Qu.:2020-04-18 1st Qu.:2020-05-30
Median : 99.0 Mode :character Median :2020-05-05 Median :2020-06-23
Mean :100.9 Mean :2020-05-03 Mean :2020-06-23
3rd Qu.:150.0 3rd Qu.:2020-05-18 3rd Qu.:2020-07-20
Max. :200.0 Max. :2020-06-01 Max. :2020-09-06
summary関数を利用するとおおむねの情報が表示されます。
ただし、med変数については文字列であるとだけしか表示されていないので、因子型に変更しておきましょう
dat2 <- dat %>% mutate(med = as.factor(med))
dat2 %>% summary() id med start end
Min. : 1.0 A:151 Min. :2020-04-01 Min. :2020-04-09
1st Qu.: 51.0 B:132 1st Qu.:2020-04-18 1st Qu.:2020-05-30
Median : 99.0 C:117 Median :2020-05-05 Median :2020-06-23
Mean :100.9 Mean :2020-05-03 Mean :2020-06-23
3rd Qu.:150.0 3rd Qu.:2020-05-18 3rd Qu.:2020-07-20
Max. :200.0 Max. :2020-06-01 Max. :2020-09-06
これで、このデータは、idが1から200まで、200人分のデータで、このデータにでてくる薬の種類は、A、B、Cの3種類であることがわかります。
また、
dat2 %>% count(id, med)# A tibble: 326 × 3
id med n
<dbl> <fct> <int>
1 1 A 1
2 1 B 1
3 2 A 2
4 3 A 1
5 4 B 1
6 5 A 1
7 5 B 2
8 6 A 1
9 6 B 1
10 7 C 1
# ℹ 316 more rows
でさらっと見ると、nが2等も見当たるので、二回同じ薬が同じIDで処方されているデータになっていることもわかります。
countのこの形だとちょっと見ずらいので、base::table関数をこういう場合に利用すると便利です。
table(dat2$id, dat2$med) %>% head() #あるいは、
A B C
1 1 1 0
2 2 0 0
3 1 0 0
4 0 1 0
5 1 2 0
6 1 1 0
dat2 %>% {table(.$id, .$med)} %>% head()
A B C
1 1 1 0
2 2 0 0
3 1 0 0
4 0 1 0
5 1 2 0
6 1 1 0
{}でくくると、 DATA %>% {} として、{}の中の「.」がDATAと同じになるので、データの名前がめちゃくちゃ長い場合、
nanika_monosugoku_namae_ga_nagai_data <- dat2
table(
nanika_monosugoku_namae_ga_nagai_data$id,
nanika_monosugoku_namae_ga_nagai_data$med
) %>% head()
A B C
1 1 1 0
2 2 0 0
3 1 0 0
4 0 1 0
5 1 2 0
6 1 1 0
nanika_monosugoku_namae_ga_nagai_data %>%
{table(.$id, .$med)} %>%
head()
A B C
1 1 1 0
2 2 0 0
3 1 0 0
4 0 1 0
5 1 2 0
6 1 1 0
すっきりと書くことができます。
それではここからは、問題に答える形で解説をすすめていきます
問題:
薬AからC、それぞれの投与期間を日数で最小、平均、最大値を集計してみましょう尚、重複して投与されている場合は、
|—-A1—–| |—–A2—–|
このように期間がかぶっていない場合は、どちらか長い方を
採用、
|—-A1—-| |———–A2——|
このように期間がかぶっている場合は、1つの期間として集計する。
というルールでやってみてください。
次の動画では、どのように処理するかのイメージを解説していきます
set.seed(12345)
{ #
plot_dat <- function(dat, xrange=c(1,100)){
gt <- dat %>%
mutate(id = n():1) %>%
ggplot() +
geom_segment(aes(x = s, xend = e, y = id, yend = id)) +
geom_point(aes(x = s, y=id),color="orange") +
geom_point(aes(x = e, y=id),color="darkblue") +
theme_void()+
coord_cartesian(xlim=c(as_date(xrange[1]),as_date(xrange[2])))
gb <- dat %>%
ggplot() +
geom_segment(aes(x=s,xend=e,y=1,yend=1))+
geom_point(aes(x=s,y=1),color="orange") +
geom_point(aes(x=e,y=1),color="darkblue") +
theme_void()+
coord_cartesian(xlim=c(as_date(xrange[1]),as_date(xrange[2])))
cowplot::plot_grid(gt,gb,nrow=2,rel_heights = c(7,1))
}
dat <- tibble(
s = sample(1:100, 10, replace = TRUE),
e = s + sample(1:10, 10, replace = TRUE)
) %>%
mutate(s = as_date(s),
e = as_date(e))
dat <- tribble(
~s, ~e,
1, 2,
3, 5,
5, 6,
5, 8,
9, 11
) %>%
mutate(s=as_date(s), e=as_date(e))
plot_dat(dat, c(1,15))
dat %>%
arrange(s) %>%
mutate(interv = interval(s,e)) %>%
mutate(prev_overlap = int_overlaps(interv, lag(interv))) %>%
replace_na(list(prev_overlap=FALSE)) %>%
mutate(presc_id = cumsum(!prev_overlap)) %>%
group_by(presc_id) %>%
summarise(s = min(s), e = max(e))
dat2 <- dat %>%
arrange(s) %>%
mutate(interv = interval(s,e)) %>%
mutate(post_overlap = int_overlaps(interv, lead(interv))) %>%
mutate(new_e = if_else(post_overlap, lead(e), e, e)) %>%
mutate(e = if_else(post_overlap, new_e, e, e))
View(dat2)
plot_dat(dat2)
check_overlap <- function(.data){
temp <- .data %>%
mutate(new_interval = interval(s,e,tzone = "UTC"))
temp <- temp %>%
mutate(
new_int_next = int_overlaps(new_interval, lead(new_interval))
)
any(temp$new_int_next, na.rm=TRUE)
}
run_test <- function(x){
print(x)
set.seed(x)
dat <- tibble(
s = sample(1:100, 10, replace = TRUE),
e = s + sample(1:10, 10, replace = TRUE)
) %>%
mutate(s = as_date(s),
e = as_date(e))
dat2 <- dat %>%
arrange(s) %>%
mutate(interv = interval(s,e)) %>%
mutate(
prev_overlap = int_overlaps(lag(interv),interv),
post_overlap = int_overlaps(interv, lead(interv))
) %>%
mutate(
new_e = if_else(post_overlap, lead(e), e)
) %>%
mutate(e = new_e) %>%
filter(!post_overlap)
return(check_overlap(dat2))
}
res1 <- map_lgl(100:1000, run_test)
}[1] 100
[1] 101
[1] 102
[1] 103
[1] 104
[1] 105
[1] 106
[1] 107
[1] 108
[1] 109
[1] 110
[1] 111
[1] 112
[1] 113
[1] 114
[1] 115
[1] 116
[1] 117
[1] 118
[1] 119
[1] 120
[1] 121
[1] 122
[1] 123
[1] 124
[1] 125
[1] 126
[1] 127
[1] 128
[1] 129
[1] 130
[1] 131
[1] 132
[1] 133
[1] 134
[1] 135
[1] 136
[1] 137
[1] 138
[1] 139
[1] 140
[1] 141
[1] 142
[1] 143
[1] 144
[1] 145
[1] 146
[1] 147
[1] 148
[1] 149
[1] 150
[1] 151
[1] 152
[1] 153
[1] 154
[1] 155
[1] 156
[1] 157
[1] 158
[1] 159
[1] 160
[1] 161
[1] 162
[1] 163
[1] 164
[1] 165
[1] 166
[1] 167
[1] 168
[1] 169
[1] 170
[1] 171
[1] 172
[1] 173
[1] 174
[1] 175
[1] 176
[1] 177
[1] 178
[1] 179
[1] 180
[1] 181
[1] 182
[1] 183
[1] 184
[1] 185
[1] 186
[1] 187
[1] 188
[1] 189
[1] 190
[1] 191
[1] 192
[1] 193
[1] 194
[1] 195
[1] 196
[1] 197
[1] 198
[1] 199
[1] 200
[1] 201
[1] 202
[1] 203
[1] 204
[1] 205
[1] 206
[1] 207
[1] 208
[1] 209
[1] 210
[1] 211
[1] 212
[1] 213
[1] 214
[1] 215
[1] 216
[1] 217
[1] 218
[1] 219
[1] 220
[1] 221
[1] 222
[1] 223
[1] 224
[1] 225
[1] 226
[1] 227
[1] 228
[1] 229
[1] 230
[1] 231
[1] 232
[1] 233
[1] 234
[1] 235
[1] 236
[1] 237
[1] 238
[1] 239
[1] 240
[1] 241
[1] 242
[1] 243
[1] 244
[1] 245
[1] 246
[1] 247
[1] 248
[1] 249
[1] 250
[1] 251
[1] 252
[1] 253
[1] 254
[1] 255
[1] 256
[1] 257
[1] 258
[1] 259
[1] 260
[1] 261
[1] 262
[1] 263
[1] 264
[1] 265
[1] 266
[1] 267
[1] 268
[1] 269
[1] 270
[1] 271
[1] 272
[1] 273
[1] 274
[1] 275
[1] 276
[1] 277
[1] 278
[1] 279
[1] 280
[1] 281
[1] 282
[1] 283
[1] 284
[1] 285
[1] 286
[1] 287
[1] 288
[1] 289
[1] 290
[1] 291
[1] 292
[1] 293
[1] 294
[1] 295
[1] 296
[1] 297
[1] 298
[1] 299
[1] 300
[1] 301
[1] 302
[1] 303
[1] 304
[1] 305
[1] 306
[1] 307
[1] 308
[1] 309
[1] 310
[1] 311
[1] 312
[1] 313
[1] 314
[1] 315
[1] 316
[1] 317
[1] 318
[1] 319
[1] 320
[1] 321
[1] 322
[1] 323
[1] 324
[1] 325
[1] 326
[1] 327
[1] 328
[1] 329
[1] 330
[1] 331
[1] 332
[1] 333
[1] 334
[1] 335
[1] 336
[1] 337
[1] 338
[1] 339
[1] 340
[1] 341
[1] 342
[1] 343
[1] 344
[1] 345
[1] 346
[1] 347
[1] 348
[1] 349
[1] 350
[1] 351
[1] 352
[1] 353
[1] 354
[1] 355
[1] 356
[1] 357
[1] 358
[1] 359
[1] 360
[1] 361
[1] 362
[1] 363
[1] 364
[1] 365
[1] 366
[1] 367
[1] 368
[1] 369
[1] 370
[1] 371
[1] 372
[1] 373
[1] 374
[1] 375
[1] 376
[1] 377
[1] 378
[1] 379
[1] 380
[1] 381
[1] 382
[1] 383
[1] 384
[1] 385
[1] 386
[1] 387
[1] 388
[1] 389
[1] 390
[1] 391
[1] 392
[1] 393
[1] 394
[1] 395
[1] 396
[1] 397
[1] 398
[1] 399
[1] 400
[1] 401
[1] 402
[1] 403
[1] 404
[1] 405
[1] 406
[1] 407
[1] 408
[1] 409
[1] 410
[1] 411
[1] 412
[1] 413
[1] 414
[1] 415
[1] 416
[1] 417
[1] 418
[1] 419
[1] 420
[1] 421
[1] 422
[1] 423
[1] 424
[1] 425
[1] 426
[1] 427
[1] 428
[1] 429
[1] 430
[1] 431
[1] 432
[1] 433
[1] 434
[1] 435
[1] 436
[1] 437
[1] 438
[1] 439
[1] 440
[1] 441
[1] 442
[1] 443
[1] 444
[1] 445
[1] 446
[1] 447
[1] 448
[1] 449
[1] 450
[1] 451
[1] 452
[1] 453
[1] 454
[1] 455
[1] 456
[1] 457
[1] 458
[1] 459
[1] 460
[1] 461
[1] 462
[1] 463
[1] 464
[1] 465
[1] 466
[1] 467
[1] 468
[1] 469
[1] 470
[1] 471
[1] 472
[1] 473
[1] 474
[1] 475
[1] 476
[1] 477
[1] 478
[1] 479
[1] 480
[1] 481
[1] 482
[1] 483
[1] 484
[1] 485
[1] 486
[1] 487
[1] 488
[1] 489
[1] 490
[1] 491
[1] 492
[1] 493
[1] 494
[1] 495
[1] 496
[1] 497
[1] 498
[1] 499
[1] 500
[1] 501
[1] 502
[1] 503
[1] 504
[1] 505
[1] 506
[1] 507
[1] 508
[1] 509
[1] 510
[1] 511
[1] 512
[1] 513
[1] 514
[1] 515
[1] 516
[1] 517
[1] 518
[1] 519
[1] 520
[1] 521
[1] 522
[1] 523
[1] 524
[1] 525
[1] 526
[1] 527
[1] 528
[1] 529
[1] 530
[1] 531
[1] 532
[1] 533
[1] 534
[1] 535
[1] 536
[1] 537
[1] 538
[1] 539
[1] 540
[1] 541
[1] 542
[1] 543
[1] 544
[1] 545
[1] 546
[1] 547
[1] 548
[1] 549
[1] 550
[1] 551
[1] 552
[1] 553
[1] 554
[1] 555
[1] 556
[1] 557
[1] 558
[1] 559
[1] 560
[1] 561
[1] 562
[1] 563
[1] 564
[1] 565
[1] 566
[1] 567
[1] 568
[1] 569
[1] 570
[1] 571
[1] 572
[1] 573
[1] 574
[1] 575
[1] 576
[1] 577
[1] 578
[1] 579
[1] 580
[1] 581
[1] 582
[1] 583
[1] 584
[1] 585
[1] 586
[1] 587
[1] 588
[1] 589
[1] 590
[1] 591
[1] 592
[1] 593
[1] 594
[1] 595
[1] 596
[1] 597
[1] 598
[1] 599
[1] 600
[1] 601
[1] 602
[1] 603
[1] 604
[1] 605
[1] 606
[1] 607
[1] 608
[1] 609
[1] 610
[1] 611
[1] 612
[1] 613
[1] 614
[1] 615
[1] 616
[1] 617
[1] 618
[1] 619
[1] 620
[1] 621
[1] 622
[1] 623
[1] 624
[1] 625
[1] 626
[1] 627
[1] 628
[1] 629
[1] 630
[1] 631
[1] 632
[1] 633
[1] 634
[1] 635
[1] 636
[1] 637
[1] 638
[1] 639
[1] 640
[1] 641
[1] 642
[1] 643
[1] 644
[1] 645
[1] 646
[1] 647
[1] 648
[1] 649
[1] 650
[1] 651
[1] 652
[1] 653
[1] 654
[1] 655
[1] 656
[1] 657
[1] 658
[1] 659
[1] 660
[1] 661
[1] 662
[1] 663
[1] 664
[1] 665
[1] 666
[1] 667
[1] 668
[1] 669
[1] 670
[1] 671
[1] 672
[1] 673
[1] 674
[1] 675
[1] 676
[1] 677
[1] 678
[1] 679
[1] 680
[1] 681
[1] 682
[1] 683
[1] 684
[1] 685
[1] 686
[1] 687
[1] 688
[1] 689
[1] 690
[1] 691
[1] 692
[1] 693
[1] 694
[1] 695
[1] 696
[1] 697
[1] 698
[1] 699
[1] 700
[1] 701
[1] 702
[1] 703
[1] 704
[1] 705
[1] 706
[1] 707
[1] 708
[1] 709
[1] 710
[1] 711
[1] 712
[1] 713
[1] 714
[1] 715
[1] 716
[1] 717
[1] 718
[1] 719
[1] 720
[1] 721
[1] 722
[1] 723
[1] 724
[1] 725
[1] 726
[1] 727
[1] 728
[1] 729
[1] 730
[1] 731
[1] 732
[1] 733
[1] 734
[1] 735
[1] 736
[1] 737
[1] 738
[1] 739
[1] 740
[1] 741
[1] 742
[1] 743
[1] 744
[1] 745
[1] 746
[1] 747
[1] 748
[1] 749
[1] 750
[1] 751
[1] 752
[1] 753
[1] 754
[1] 755
[1] 756
[1] 757
[1] 758
[1] 759
[1] 760
[1] 761
[1] 762
[1] 763
[1] 764
[1] 765
[1] 766
[1] 767
[1] 768
[1] 769
[1] 770
[1] 771
[1] 772
[1] 773
[1] 774
[1] 775
[1] 776
[1] 777
[1] 778
[1] 779
[1] 780
[1] 781
[1] 782
[1] 783
[1] 784
[1] 785
[1] 786
[1] 787
[1] 788
[1] 789
[1] 790
[1] 791
[1] 792
[1] 793
[1] 794
[1] 795
[1] 796
[1] 797
[1] 798
[1] 799
[1] 800
[1] 801
[1] 802
[1] 803
[1] 804
[1] 805
[1] 806
[1] 807
[1] 808
[1] 809
[1] 810
[1] 811
[1] 812
[1] 813
[1] 814
[1] 815
[1] 816
[1] 817
[1] 818
[1] 819
[1] 820
[1] 821
[1] 822
[1] 823
[1] 824
[1] 825
[1] 826
[1] 827
[1] 828
[1] 829
[1] 830
[1] 831
[1] 832
[1] 833
[1] 834
[1] 835
[1] 836
[1] 837
[1] 838
[1] 839
[1] 840
[1] 841
[1] 842
[1] 843
[1] 844
[1] 845
[1] 846
[1] 847
[1] 848
[1] 849
[1] 850
[1] 851
[1] 852
[1] 853
[1] 854
[1] 855
[1] 856
[1] 857
[1] 858
[1] 859
[1] 860
[1] 861
[1] 862
[1] 863
[1] 864
[1] 865
[1] 866
[1] 867
[1] 868
[1] 869
[1] 870
[1] 871
[1] 872
[1] 873
[1] 874
[1] 875
[1] 876
[1] 877
[1] 878
[1] 879
[1] 880
[1] 881
[1] 882
[1] 883
[1] 884
[1] 885
[1] 886
[1] 887
[1] 888
[1] 889
[1] 890
[1] 891
[1] 892
[1] 893
[1] 894
[1] 895
[1] 896
[1] 897
[1] 898
[1] 899
[1] 900
[1] 901
[1] 902
[1] 903
[1] 904
[1] 905
[1] 906
[1] 907
[1] 908
[1] 909
[1] 910
[1] 911
[1] 912
[1] 913
[1] 914
[1] 915
[1] 916
[1] 917
[1] 918
[1] 919
[1] 920
[1] 921
[1] 922
[1] 923
[1] 924
[1] 925
[1] 926
[1] 927
[1] 928
[1] 929
[1] 930
[1] 931
[1] 932
[1] 933
[1] 934
[1] 935
[1] 936
[1] 937
[1] 938
[1] 939
[1] 940
[1] 941
[1] 942
[1] 943
[1] 944
[1] 945
[1] 946
[1] 947
[1] 948
[1] 949
[1] 950
[1] 951
[1] 952
[1] 953
[1] 954
[1] 955
[1] 956
[1] 957
[1] 958
[1] 959
[1] 960
[1] 961
[1] 962
[1] 963
[1] 964
[1] 965
[1] 966
[1] 967
[1] 968
[1] 969
[1] 970
[1] 971
[1] 972
[1] 973
[1] 974
[1] 975
[1] 976
[1] 977
[1] 978
[1] 979
[1] 980
[1] 981
[1] 982
[1] 983
[1] 984
[1] 985
[1] 986
[1] 987
[1] 988
[1] 989
[1] 990
[1] 991
[1] 992
[1] 993
[1] 994
[1] 995
[1] 996
[1] 997
[1] 998
[1] 999
[1] 1000