Dataset Viewer
Auto-converted to Parquet Duplicate
id
stringlengths
4
123
downloads
int64
0
2.68M
downloadsAllTime
int64
0
143M
likes
int64
0
9.68k
tags
listlengths
1
7.92k
organization
stringlengths
2
42
has_audio
bool
2 classes
has_speech
bool
2 classes
has_music
bool
2 classes
has_robot
bool
2 classes
has_bio
bool
2 classes
has_med
bool
2 classes
has_series
bool
2 classes
has_video
bool
2 classes
has_image
bool
2 classes
has_text
bool
2 classes
has_science
bool
2 classes
is_biomed
bool
2 classes
data_download_timestamp
timestamp[us, tz=UTC]date
2026-05-02 04:54:33
2026-05-02 04:54:33
nvidia/Nemotron-Personas-Korea
51,701
51,701
371
[ "task_categories:text-generation", "language:ko", "license:cc-by-4.0", "size_categories:1M<n<10M", "format:parquet", "format:optimized-parquet", "modality:image", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "library:datadesigner", "region:u...
nvidia
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
Jackrong/GLM-5.1-Reasoning-1M-Cleaned
4,398
4,398
145
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "language:zh", "license:apache-2.0", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning",...
Jackrong
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Roman1111111/claude-opus-4.6-10000x
7,648
9,970
320
[ "license:mit", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
Roman1111111
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
lambda/hermes-agent-reasoning-traces
8,681
8,686
268
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "tool-calling", "function-calling...
lambda
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
openai/healthbench-professional
6,699
6,699
43
[ "license:mit", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "health", "healthbench" ]
openai
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
open-thoughts/AgentTrove
200
200
32
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agent", "code", "agentic-traces", "reinforcement-learning", ...
open-thoughts
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/Nemotron-Image-Training-v3
994
994
31
[ "task_categories:visual-question-answering", "task_categories:image-text-to-text", "license:cc-by-4.0", "size_categories:1M<n<10M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
nvidia
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
Jackrong/DeepSeek-V4-Distill-8000x
2,435
2,435
34
[ "task_categories:text-generation", "source_datasets:Jackrong/GLM-5.1-Reasoning-1M-Cleaned", "language:en", "license:mit", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning", "di...
Jackrong
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
ShadenA/MathNet
11,446
11,448
35
[ "task_categories:question-answering", "task_categories:text-generation", "task_categories:image-to-text", "language:en", "language:pt", "language:es", "language:fr", "language:it", "language:sr", "language:sl", "language:de", "language:zh", "language:ro", "language:ko", "language:nl", ...
ShadenA
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
AlicanKiraz0/Cybersecurity-Dataset-Fenrir-v2.1
6,059
10,875
62
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "cybersecurity", "defensive-security", "instruction-tuning" ]
AlicanKiraz0
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
SALT-NLP/SWE-chat
1,244
1,244
26
[ "task_categories:text-generation", "language:en", "license:odc-by", "size_categories:1M<n<10M", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2604.20779", "region:us", "code", "agent", "trace...
SALT-NLP
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Modotte/CodeX-2M-Thinking
3,326
11,392
52
[ "task_categories:text-generation", "task_categories:question-answering", "annotations_creators:machine-generated", "annotations_creators:expert-verified", "multilinguality:monolingual", "source_datasets:Modotte internal synthetic generation", "language:en", "license:apache-2.0", "size_categories:1M<...
Modotte
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Roman1111111/claude-sonnet-4.6-120000x
4,064
4,064
59
[ "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
Roman1111111
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
badlogicgames/pi-mono
20,105
20,105
103
[ "task_categories:text-generation", "language:en", "language:code", "license:other", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agent-traces", "coding-agent", "...
badlogicgames
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
SWE-bench/SWE-bench_Verified
100,054
916,828
55
[ "benchmark:official", "benchmark:eval-yaml", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
SWE-bench
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
beyoru/Deepseek-v4-pro-max-distill-1000x
500
500
16
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning", "distillation", "chain-of-thought", "deepseek", "...
beyoru
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
TAAC2026/data_sample_1000
11,723
17,170
74
[ "license:cc-by-nc-4.0", "size_categories:1K<n<10K", "format:parquet", "modality:tabular", "modality:timeseries", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "TAAC2026", "recommendation" ]
TAAC2026
false
false
false
false
false
false
true
false
false
false
false
false
2026-05-02T04:54:33.430000
lordx64/reasoning-distill-claude-opus-4-7-max
879
879
28
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:1K<n<10K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning", "chain-of-thought", ...
lordx64
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
HuggingFaceFW/fineweb-edu
405,938
6,743,185
1,049
[ "task_categories:text-generation", "language:en", "license:odc-by", "size_categories:1B<n<10B", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2406.17557", "arxiv:2404.14219", "arxiv:2401.10020", ...
HuggingFaceFW
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/Nemotron-Personas-USA
11,915
121,537
298
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "library:polars", "library:datadesigner", "region:us", "synthetic", "personas", "NVIDIA", "da...
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
genrobot2025/10Kh-RealOmin-OpenData
200,660
561,679
211
[ "task_categories:robotics", "task_categories:reinforcement-learning", "language:en", "language:zh", "license:cc-by-sa-4.0", "size_categories:n>1T", "modality:video", "region:us", "agent", "robotic", "real-world", "dual-arm", "video", "vla", "embodied intelligence" ]
genrobot2025
false
false
false
true
false
false
false
true
false
false
false
false
2026-05-02T04:54:33.430000
lordx64/reasoning-distill-opus-4-7-max-sft
759
759
23
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:1K<n<10K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning", "chain-of-thought", ...
lordx64
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
tencent/MegaStyle-1.4M
1,000
1,000
37
[ "task_categories:text-to-image", "language:en", "license:other", "size_categories:1M<n<10M", "format:parquet", "modality:image", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2604.08364", "region:us", "style transfer", "text-to-image...
tencent
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
openai/gsm8k
871,250
11,023,453
1,288
[ "benchmark:official", "benchmark:eval-yaml", "task_categories:text-generation", "annotations_creators:crowdsourced", "language_creators:crowdsourced", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:mit", "size_categories:10K<n<100K", "format:parquet", "modal...
openai
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
hf-audio/open-asr-leaderboard
21,819
150,229
25
[ "benchmark:official", "benchmark:eval-yaml", "size_categories:100K<n<1M", "format:parquet", "modality:audio", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2510.06961", "region:us" ]
hf-audio
true
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/Nemotron-SFT-Math-v3
1,886
3,298
28
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "license:cc-by-sa-4.0", "arxiv:2512.15489", "region:us", "math" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
ScaleAI/SWE-bench_Pro
37,647
988,691
104
[ "benchmark:official", "benchmark:eval-yaml", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
ScaleAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
open-thoughts/TaskTrove
60
60
10
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agent", "code", "agentic-tasks", "harbor", "reinforcement-l...
open-thoughts
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
ai4privacy/pii-masking-300k
6,034
52,411
94
[ "task_categories:text-classification", "task_categories:token-classification", "task_categories:table-question-answering", "task_categories:question-answering", "task_categories:zero-shot-classification", "task_categories:summarization", "task_categories:feature-extraction", "task_categories:text-gene...
ai4privacy
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
ZhihaoNan/AtomBlock-WebUI
2,638
2,638
44
[ "task_categories:object-detection", "language:en", "license:cc-by-nc-sa-4.0", "size_categories:1K<n<10K", "format:parquet", "format:optimized-parquet", "modality:image", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agent", "u...
ZhihaoNan
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
NuTonic/sat-image-boundingbox-sft-full
1,928
1,928
12
[ "task_categories:image-text-to-text", "language:en", "license:apache-2.0", "size_categories:100K<n<1M", "format:json", "modality:image", "modality:text", "modality:geospatial", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "satellite", "land-...
NuTonic
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
ILSVRC/imagenet-1k
114,943
1,884,361
787
[ "task_categories:image-classification", "task_ids:multi-class-image-classification", "annotations_creators:crowdsourced", "language_creators:crowdsourced", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:other", "size_categories:1M<n<10M", "format:parquet", "fo...
ILSVRC
false
false
false
false
false
false
false
false
true
false
false
false
2026-05-02T04:54:33.430000
teknium/OpenHermes-2.5
22,852
216,051
826
[ "language:eng", "size_categories:1M<n<10M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us", "synthetic", "GPT-4", "Distillation", "Compilation" ]
teknium
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
google/smol
2,791
33,044
106
[ "task_categories:translation", "language:aa", "language:ab", "language:abq", "language:ace", "language:ach", "language:ady", "language:aeb", "language:af", "language:ahr", "language:aii", "language:ak", "language:alz", "language:am", "language:apc", "language:apd", "language:ar", "...
google
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Kassadin88/Claude-Distills
671
671
25
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "license:mit", "size_categories:100K<n<1M", "region:us", "claude", "distillation", "reasoning", "instruction-tuning", "sft" ]
Kassadin88
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
HuggingFaceFW/fineweb
677,412
7,251,328
2,775
[ "task_categories:text-generation", "language:en", "license:odc-by", "size_categories:10B<n<100B", "modality:tabular", "modality:text", "arxiv:2306.01116", "arxiv:2109.07445", "arxiv:2406.17557", "doi:10.57967/hf/2493", "region:us" ]
HuggingFaceFW
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
ianncity/KIMI-K2.5-1000000x
5,884
6,175
252
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "license:apache-2.0", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning", "chain-of-thou...
ianncity
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
ADSKAILab/Zero-To-CAD-1m
303
303
7
[ "task_categories:text-to-3d", "task_categories:image-to-3d", "language:en", "language:code", "license:apache-2.0", "size_categories:100K<n<1M", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2604.24...
ADSKAILab
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
TeichAI/lordx64-claude-opus-4.7-max-cleaned
648
648
7
[ "license:apache-2.0", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "opus-4.7", "distillation", "reasoning" ]
TeichAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
PleIAs/CommonLingua-Train
92
92
7
[ "task_categories:text-classification", "language:multilingual", "license:other", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "language-identification", "common-corpus", "african-l...
PleIAs
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
cais/hle
52,806
288,146
791
[ "benchmark:official", "license:mit", "size_categories:1K<n<10K", "format:parquet", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
cais
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
nvidia/OpenCodeInstruct
6,737
36,284
78
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2504.04030", "region:us", "code", "synthetic" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
AlicanKiraz0/All-CVE-Records-Training-Dataset
2,140
5,683
56
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us", "cybersecurity", "cve", "vulnerability" ]
AlicanKiraz0
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
SII-WANGZJ/Polymarket_data
44,500
54,391
52
[ "size_categories:1B<n<10B", "modality:tabular", "modality:text", "region:us" ]
SII-WANGZJ
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Crownelius/Opus-4.6-Reasoning-3300x
4,047
7,374
293
[ "license:apache-2.0", "size_categories:1K<n<10K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
Crownelius
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/Nemotron-SFT-Agentic-v2
1,802
2,271
14
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "license:apache-2.0", "license:mit", "region:us", "tool-use" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Delores-Lin/MDPBench
24,353
24,384
18
[ "benchmark:official", "benchmark:eval-yaml", "task_categories:image-to-text", "language:zh", "language:en", "language:ar", "language:de", "language:es", "language:fr", "language:hi", "language:id", "language:it", "language:nl", "language:ja", "language:ko", "language:pt", "language:r...
Delores-Lin
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
WithinUsAI/GPT5.5_thinking_max_distill_god_seed_25K
308
308
7
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "gpt-5-5", "thinking-max-distill", "god-level-recursive-seed-ai"...
WithinUsAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
ning423/Hermes-OmniForge-Qwen36-27B-full-v0.3.0-unsloth
91
91
6
[ "task_categories:text-generation", "task_categories:visual-question-answering", "task_categories:image-text-to-text", "language:en", "license:other", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "r...
ning423
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
3dlg-hcvc/ReVSI
657
666
6
[ "task_categories:visual-question-answering", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:parquet", "modality:text", "modality:video", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2604.24300", "region:us", "Spatial Inte...
3dlg-hcvc
false
false
false
false
false
false
false
true
true
true
false
false
2026-05-02T04:54:33.430000
hzxie/DOM
6,931
6,946
6
[ "task_categories:robotics", "license:other", "size_categories:100K<n<1M", "arxiv:2601.22153", "region:us", "lerobot", "franka", "dynamic", "visual-language-action", "vla" ]
hzxie
false
false
false
true
false
false
false
false
false
false
false
false
2026-05-02T04:54:33.430000
microsoft/World-R1
135
135
6
[ "source_datasets:original", "language:en", "license:mit", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2604.24764", "region:us", "text", "datasets", "text-to-video", "video-generation",...
microsoft
false
false
false
false
false
false
false
true
false
true
false
false
2026-05-02T04:54:33.430000
ning423/nemotron-nano-hermes-traces
21
21
6
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning", "agent", "tool-calling", "hermes", "sft", ...
ning423
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
angrygiraffe/claude-opus-4.6-4.7-reasoning-8.7k
0
0
7
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "license:apache-2.0", "size_categories:1K<n<10K", "modality:text", "region:us", "sft", "chain-of-thought", "coding", "math", "roleplay", "science", "humanities", "art", "multi-turn", "text", "js...
angrygiraffe
false
false
false
false
false
false
false
false
false
true
true
false
2026-05-02T04:54:33.430000
Salesforce/wikitext
1,289,476
30,805,283
677
[ "task_categories:text-generation", "task_categories:fill-mask", "task_ids:language-modeling", "task_ids:masked-language-modeling", "annotations_creators:no-annotation", "language_creators:crowdsourced", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:cc-by-sa-3.0...
Salesforce
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
wikimedia/wikipedia
152,492
2,034,437
1,204
[ "task_categories:text-generation", "task_categories:fill-mask", "task_ids:language-modeling", "task_ids:masked-language-modeling", "language:ab", "language:ace", "language:ady", "language:af", "language:alt", "language:am", "language:ami", "language:an", "language:ang", "language:anp", "...
wikimedia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
google/fleurs
54,509
1,471,721
398
[ "task_categories:automatic-speech-recognition", "annotations_creators:expert-generated", "annotations_creators:crowdsourced", "annotations_creators:machine-generated", "language_creators:crowdsourced", "language_creators:expert-generated", "multilinguality:multilingual", "language:afr", "language:am...
google
false
true
false
false
false
false
false
false
false
false
false
false
2026-05-02T04:54:33.430000
Idavidrein/gpqa
110,376
1,621,246
424
[ "benchmark:official", "benchmark:eval-yaml", "task_categories:question-answering", "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:1K<n<10K", "format:csv", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "...
Idavidrein
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/HelpSteer2
4,065
469,587
451
[ "language:en", "license:cc-by-4.0", "size_categories:10K<n<100K", "format:json", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "arxiv:2410.01257", "arxiv:2406.08673", "region:us", "human-feedback" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Anthropic/EconomicIndex
21,394
89,349
514
[ "language:en", "license:mit", "arxiv:2503.04761", "region:us", "AI", "LLM", "Economic Impacts", "Anthropic" ]
Anthropic
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/PhysicalAI-SmartSpaces
10,519
675,272
72
[ "license:cc-by-4.0", "arxiv:2412.00692", "region:us" ]
nvidia
false
false
false
false
false
false
false
false
false
false
false
false
2026-05-02T04:54:33.430000
allenai/olmOCR-bench
3,547
39,106
201
[ "benchmark:official", "benchmark:eval-yaml", "language:en", "license:odc-by", "size_categories:1K<n<10K", "modality:document", "modality:text", "arxiv:2502.18443", "region:us", "text" ]
allenai
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/PhysicalAI-Autonomous-Vehicles
250,056
2,180,760
857
[ "license:other", "region:us" ]
nvidia
false
false
false
false
false
false
false
false
false
false
false
false
2026-05-02T04:54:33.430000
nvidia/Nemotron-PII
3,969
16,309
94
[ "task_categories:token-classification", "language:en", "license:cc-by-4.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "library:datadesigner", "region:us", "datadesigner", "pii", "privacy"...
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/Nemotron-Pretraining-Code-v2
83,617
129,194
122
[ "task_categories:text-generation", "license:other", "size_categories:100M<n<1B", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2508.14444", "arxiv:2508.15096", "arxiv:2412.02595", "arxiv:2505.02881", "region:us" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
shaurya03/tech-news-daily
3,446
11,675
11
[ "size_categories:10K<n<100K", "format:csv", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
shaurya03
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
harborframework/terminal-bench-2-leaderboard
13,348
113,897
26
[ "license:apache-2.0", "region:us" ]
harborframework
false
false
false
false
false
false
false
false
false
false
false
false
2026-05-02T04:54:33.430000
bones-studio/seed
3,468
10,206
115
[ "task_categories:robotics", "task_categories:text-to-video", "task_categories:video-text-to-text", "language:en", "license:other", "size_categories:100K<n<1M", "region:us", "motion-capture", "humanoid-robotics", "human-motion", "physical-ai", "whole-body-control", "NVIDIA-SOMA", "Unitree-G...
bones-studio
false
false
false
true
false
false
false
true
false
true
false
false
2026-05-02T04:54:33.430000
TeichAI/Claude-Opus-4.6-Reasoning-887x
8,020
9,803
80
[ "license:apache-2.0", "size_categories:n<1K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
TeichAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
bingbangboom/philosophia-QA
540
540
5
[ "task_categories:question-answering", "language:en", "license:cc-by-nc-sa-4.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "philosophy", "theology", "politics", "metaphysics", ...
bingbangboom
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
lightonai/veracier-industries
193
193
5
[ "task_categories:question-answering", "task_categories:document-question-answering", "task_categories:text-retrieval", "task_categories:table-question-answering", "multilinguality:multilingual", "language:fr", "language:en", "language:de", "language:it", "language:es", "license:apache-2.0", "s...
lightonai
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Anthropic/BioMysteryBench-preview
153
153
5
[ "region:us" ]
Anthropic
false
false
false
false
false
false
false
false
false
false
false
false
2026-05-02T04:54:33.430000
Anthropic/BioMysteryBench-full
82
82
6
[ "region:us" ]
Anthropic
false
false
false
false
false
false
false
false
false
false
false
false
2026-05-02T04:54:33.430000
allenai/c4
766,725
12,202,833
560
[ "task_categories:text-generation", "task_categories:fill-mask", "task_ids:language-modeling", "task_ids:masked-language-modeling", "annotations_creators:no-annotation", "language_creators:found", "multilinguality:multilingual", "source_datasets:original", "language:af", "language:am", "language:...
allenai
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
qwedsacf/competition_math
12,717
73,866
126
[ "annotations_creators:expert-generated", "language_creators:expert-generated", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:mit", "size_categories:10K<n<100K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "li...
qwedsacf
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
gsdf/EasyNegative
32,189
784,979
1,178
[ "license:other", "size_categories:n<1K", "format:imagefolder", "modality:image", "library:datasets", "library:mlcroissant", "region:us" ]
gsdf
false
false
false
false
false
false
false
false
true
false
false
false
2026-05-02T04:54:33.430000
derek-thomas/ScienceQA
14,825
217,236
225
[ "task_categories:multiple-choice", "task_categories:question-answering", "task_categories:other", "task_categories:visual-question-answering", "task_categories:text-classification", "task_ids:multiple-choice-qa", "task_ids:closed-domain-qa", "task_ids:open-domain-qa", "task_ids:visual-question-answe...
derek-thomas
false
false
false
false
true
false
false
false
true
true
true
true
2026-05-02T04:54:33.430000
roneneldan/TinyStories
96,552
1,331,040
968
[ "task_categories:text-generation", "language:en", "license:cdla-sharing-1.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2305.07759", "region:us" ]
roneneldan
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Open-Orca/OpenOrca
44,931
551,611
1,528
[ "task_categories:text-classification", "task_categories:token-classification", "task_categories:table-question-answering", "task_categories:question-answering", "task_categories:zero-shot-classification", "task_categories:summarization", "task_categories:feature-extraction", "task_categories:text-gene...
Open-Orca
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
allenai/dolma
4,058
381,651
1,025
[ "task_categories:text-generation", "language:en", "license:odc-by", "size_categories:n>1T", "arxiv:2402.00159", "arxiv:2301.13688", "region:us", "language-modeling", "casual-lm", "llm" ]
allenai
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
HuggingFaceH4/ultrachat_200k
54,334
895,018
696
[ "task_categories:text-generation", "language:en", "license:mit", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "library:polars", "arxiv:2305.14233", "region:us" ]
HuggingFaceH4
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
HuggingFaceTB/cosmopedia
20,016
510,850
688
[ "language:en", "license:apache-2.0", "size_categories:10M<n<100M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "library:polars", "arxiv:2309.05463", "arxiv:2306.11644", "region:us", "synthetic" ]
HuggingFaceTB
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
mlabonne/harmful_behaviors
14,636
79,676
122
[ "language:en", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us" ]
mlabonne
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nomic-ai/cornstack-python-v1
3,372
28,239
24
[ "license:apache-2.0", "size_categories:10M<n<100M", "format:json", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "arxiv:2412.01007", "region:us" ]
nomic-ai
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
zhang0jhon/Aesthetic-4K
3,222
91,845
45
[ "license:mit", "size_categories:1K<n<10K", "format:imagefolder", "modality:image", "modality:text", "library:datasets", "library:mlcroissant", "arxiv:2503.18352", "arxiv:2506.01331", "doi:10.57967/hf/5209", "region:us" ]
zhang0jhon
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
Anthropic/AnthropicInterviewer
1,416
16,032
371
[ "language:en", "license:mit", "size_categories:1K<n<10K", "format:csv", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us" ]
Anthropic
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
UII-AI/MedVidBench
421
668
10
[ "task_categories:video-classification", "task_categories:visual-question-answering", "task_categories:video-text-to-text", "language:en", "license:cc-by-nc-sa-4.0", "size_categories:1K<n<10K", "format:json", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:pola...
UII-AI
false
false
false
false
false
true
false
true
true
true
false
true
2026-05-02T04:54:33.430000
anrilombard/mzansi-text
250
303
7
[ "language:af", "language:en", "language:nso", "language:sot", "language:ssw", "language:tsn", "language:tso", "language:ven", "language:xho", "language:zul", "language:nbl", "license:apache-2.0", "size_categories:1M<n<10M", "format:parquet", "format:optimized-parquet", "modality:text",...
anrilombard
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
LequeuISIR/GDN-CC
24
83
4
[ "task_categories:text-classification", "task_categories:text-generation", "annotations_creators:expert-generated", "source_datasets:Grand Débat National", "language:fr", "license:mit", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:po...
LequeuISIR
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
harborframework/terminal-bench-2.0
5,095
8,774
30
[ "benchmark:official", "benchmark:eval-yaml", "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:n<1K", "region:us", "benchmark", "agents", "terminal", "code", "evaluation", "harbor" ]
harborframework
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
nvidia/Nemotron-SFT-OpenCode-v1
1,839
2,558
39
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:100K<n<1M", "region:us", "opencode" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
microsoft/OpenMementos
1,263
1,272
57
[ "task_categories:text-generation", "language:en", "license:mit", "size_categories:100K<n<1M", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "reasoning", "chain-of-thought", "context...
microsoft
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
julien-c/pi-sessions
356
356
5
[ "task_categories:text-generation", "language:en", "language:code", "license:cc-by-4.0", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agent-traces", "coding-agent",...
julien-c
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Jackrong/Kimi-K2.5-Reasoning-1M-Cleaned
863
863
12
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "language:zh", "license:apache-2.0", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning",...
Jackrong
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
Roman1111111/claude-sonnet-4.6-100000X-filtered
231
231
11
[ "license:mit", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us" ]
Roman1111111
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
ART-3D/H3D_v1
648
648
12
[ "task_categories:text-to-3d", "task_categories:image-to-image", "language:en", "license:cc-by-4.0", "size_categories:10K<n<100K", "region:us", "3d-editing", "part-level", "slat", "trellis", "instruction-following" ]
ART-3D
false
false
false
false
false
false
false
false
true
true
false
false
2026-05-02T04:54:33.430000
WithinUsAI/Opus4.7_thinking_max_distill_god_seed_25k
160
160
8
[ "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
WithinUsAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
MERChallenge/MER2026
464
464
6
[ "language:en", "license:cc-by-nc-4.0", "arxiv:2604.19417", "region:us" ]
MERChallenge
false
false
false
false
false
false
false
false
false
false
false
false
2026-05-02T04:54:33.430000
junaid008/pashto-largest-corpus
68
68
4
[ "language:ps", "license:mit", "size_categories:1M<n<10M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
junaid008
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
AMAImedia/NOESIS-1M-reasoning-router-code-math-psych-opus47-deepseek4-qwen36-gemini31-r1-gpt54
66
66
4
[ "task_categories:text-generation", "language:en", "language:ru", "language:zh", "language:ar", "language:hi", "language:es", "language:fr", "language:de", "language:ja", "language:ko", "language:tr", "language:vi", "language:fa", "language:it", "language:pt", "language:id", "langua...
AMAImedia
false
false
false
false
false
false
false
false
false
true
false
false
2026-05-02T04:54:33.430000
End of preview. Expand in Data Studio

No dataset card yet

Downloads last month
322

Space using evijit/dataverse_daily_data 1