Dataset Viewer
Auto-converted to Parquet Duplicate
id
stringlengths
4
123
downloads
int64
0
3.03M
downloadsAllTime
int64
0
143M
likes
int64
0
9.75k
tags
listlengths
1
7.92k
organization
stringlengths
2
42
has_audio
bool
2 classes
has_speech
bool
2 classes
has_music
bool
2 classes
has_robot
bool
2 classes
has_bio
bool
2 classes
has_med
bool
2 classes
has_series
bool
2 classes
has_video
bool
2 classes
has_image
bool
2 classes
has_text
bool
2 classes
has_science
bool
2 classes
is_biomed
bool
2 classes
data_download_timestamp
timestamp[us, tz=UTC]date
2026-07-01 06:19:47
2026-07-01 06:19:47
Glint-Research/Fable-5-traces
41,461
41,461
478
[ "task_categories:text-generation", "annotations_creators:machine-generated", "language:en", "license:agpl-3.0", "size_categories:1K<n<10K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", ...
Glint-Research
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
armand0e/claude-fable-5-claude-code
12,969
12,969
247
[ "task_categories:text-generation", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agent-traces", "format:agent-traces", "claude", "distillation",...
armand0e
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
Qwen/AgentWorldBench
1,329
1,329
56
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:1K<n<10K", "format:json", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2606.24597", "region:us", "world-model", "agent", ...
Qwen
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
mlabonne/open-perfectblend
2,152
13,705
118
[ "license:apache-2.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "library:polars", "arxiv:2409.20370", "region:us" ]
mlabonne
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
LocalLaws/LOCUS-v1
1,963
2,225
80
[ "task_categories:text-classification", "language:en", "license:cc-by-nc-4.0", "size_categories:1M<n<10M", "format:parquet", "format:optimized-parquet", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2606.19334", "reg...
LocalLaws
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
scholarweave/arxiv-latex
9,733
9,733
34
[ "task_categories:text-generation", "task_categories:feature-extraction", "language:en", "license:other", "size_categories:1M<n<10M", "region:us", "science", "arxiv", "latex", "academic" ]
scholarweave
false
false
false
false
false
false
false
false
false
true
true
false
2026-07-01T06:19:47.381000
BitRobot/HIW-500
56,627
56,627
35
[ "language:en", "license:cc-by-4.0", "region:us", "robotics", "humanoid" ]
BitRobot
false
false
false
true
false
false
false
false
false
false
false
false
2026-07-01T06:19:47.381000
bcbl190626/SpanishBCBL
1,516
1,516
30
[ "task_categories:other", "language:es", "license:cc-by-nc-4.0", "arxiv:2502.07429", "region:us", "neuroscience", "meg", "eeg", "brain-computer-interface", "bci", "brain-to-text", "typing", "motor", "electrophysiology" ]
bcbl190626
false
false
false
false
false
false
false
false
false
true
true
false
2026-07-01T06:19:47.381000
Crownelius/Complete-FABLE.5-traces-2M
2,892
2,892
40
[ "task_categories:text-generation", "task_ids:language-modeling", "annotations_creators:machine-generated", "language_creators:found", "language_creators:machine-generated", "multilinguality:monolingual", "language:en", "license:mit", "size_categories:1M<n<10M", "format:parquet", "modality:tabula...
Crownelius
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
ArtificialAnalysis/ITBench-AA
44,093
44,096
45
[ "task_categories:question-answering", "language:en", "license:cc-by-4.0", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "sre", "kubernetes", "root-cause-analysis", "agents", "it-operat...
ArtificialAnalysis
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
Rapidata/svg-benchmark
546
546
22
[ "task_categories:text-to-image", "task_categories:image-classification", "task_categories:reinforcement-learning", "language:en", "license:cc-by-4.0", "size_categories:100K<n<1M", "format:parquet", "format:optimized-parquet", "modality:image", "modality:text", "library:datasets", "library:dask...
Rapidata
false
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
allenai/olmOCR-bench
7,899
52,554
253
[ "benchmark:official", "benchmark:eval-yaml", "language:en", "license:odc-by", "size_categories:1K<n<10K", "modality:document", "modality:text", "arxiv:2502.18443", "region:us", "text" ]
allenai
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
WithinUsAI/claude_mythos_distilled_25k
3,538
4,007
133
[ "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "synthetic", "claude", "mythos", "distillation", "cybersecurity", "coding", "reasoning", "a...
WithinUsAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
XDOF/ABC-130k
433,064
433,064
66
[ "task_categories:robotics", "language:en", "license:apache-2.0", "size_categories:n>1T", "region:us", "robotics", "manipulation", "imitation-learning", "bimanual", "teleoperation", "mcap" ]
XDOF
false
false
false
true
false
false
false
false
false
false
false
false
2026-07-01T06:19:47.381000
CodeDevX/Vibe-Coding-Instruct
2,385
2,385
173
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:1M<n<10M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "custom", "vibecodinginstruct" ]
CodeDevX
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
lordx64/agentic-distill-fable-5-sft
1,329
1,329
49
[ "task_categories:text-generation", "language:en", "license:agpl-3.0", "size_categories:1K<n<10K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "agentic", "chain-of-thought", "distillation", "claude", "cla...
lordx64
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
angrygiraffe/claude-opus-4.6-4.7-reasoning-8.7k
8,927
16,757
424
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "region:us", "sft", "chain-of-thought", "coding", "math",...
angrygiraffe
false
false
false
false
false
false
false
false
false
true
true
false
2026-07-01T06:19:47.381000
BitRobot/HIW-500-LeRobot
20,280
20,280
16
[ "task_categories:robotics", "language:en", "license:cc-by-4.0", "size_categories:10M<n<100M", "format:parquet", "modality:tabular", "modality:text", "modality:timeseries", "modality:video", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "library:lerobot", "r...
BitRobot
false
false
false
true
false
false
true
true
false
true
false
false
2026-07-01T06:19:47.381000
Aignostics/OpenTME
15,602
16,382
31
[ "task_categories:image-classification", "task_categories:image-segmentation", "task_categories:image-feature-extraction", "task_categories:object-detection", "license:other", "size_categories:10K<n<100K", "format:imagefolder", "modality:image", "library:datasets", "library:mlcroissant", "arxiv:2...
Aignostics
false
false
false
false
true
false
false
false
true
false
false
true
2026-07-01T06:19:47.381000
nvidia/Open-SWE-Traces
2,868
2,886
33
[ "license:cc-by-4.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2606.16038", "region:us", "code", "synthetic", "tools", "agents", "software" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
PawanKrd/claude-fable-5-code
693
693
25
[ "task_categories:text-generation", "language:en", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "code", "claude", "fable-5" ]
PawanKrd
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
AletheiaResearch/GLM-5.2-Agent
1,091
1,091
22
[ "task_categories:text-generation", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:eu", "agent-traces", "format:agent-traces", "pi", "distillation", "...
AletheiaResearch
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
open-thoughts/OpenThoughts-Agent-SFT-100K
522
522
13
[ "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agents", "terminal", "code", "software-engineering", "sft" ]
open-thoughts
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
HuggingFaceFW/fineweb
257,884
8,601,261
2,908
[ "task_categories:text-generation", "language:en", "license:odc-by", "size_categories:10B<n<100B", "modality:tabular", "modality:text", "arxiv:2306.01116", "arxiv:2109.07445", "arxiv:2406.17557", "doi:10.57967/hf/2493", "region:us" ]
HuggingFaceFW
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
futo-org/swipe.futo.org
865
3,323
27
[ "task_categories:other", "language:en", "license:mit", "size_categories:1M<n<10M", "format:json", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.25247", "region:us" ]
futo-org
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
Meddies/meddies-persona-vie
1,016
1,424
14
[ "task_categories:other", "annotations_creators:machine-generated", "language_creators:machine-generated", "multilinguality:monolingual", "source_datasets:HoangHa/meddies-persona", "language:vi", "license:cc-by-nc-4.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:data...
Meddies
false
false
false
false
false
true
false
false
false
true
false
true
2026-07-01T06:19:47.381000
ginigen-ai/Metacognition-Bench
48
48
17
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "license:apache-2.0", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "metacognition", "self-correctio...
ginigen-ai
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
badlogicgames/pi-mono
2,406
25,244
173
[ "task_categories:text-generation", "language:en", "language:code", "license:other", "region:us", "agent-traces", "coding-agent", "pi-share-hf" ]
badlogicgames
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
nvidia/Nemotron-Personas-Korea
13,156
105,403
513
[ "task_categories:text-generation", "language:ko", "license:cc-by-4.0", "size_categories:1M<n<10M", "format:parquet", "format:optimized-parquet", "modality:image", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "library:datadesigner", "region:u...
nvidia
false
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
openai/gsm8k
924,348
12,950,870
1,410
[ "benchmark:official", "benchmark:eval-yaml", "task_categories:text-generation", "annotations_creators:crowdsourced", "language_creators:crowdsourced", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:mit", "size_categories:10K<n<100K", "format:parquet", "modal...
openai
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
WithinUsAI/GPT_5.5_Distilled
976
1,168
26
[ "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
WithinUsAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
makora-ai/triton-gpu-latency
173
173
13
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "gpu", "triton", "cuda", "kernel-generation", "code", "p...
makora-ai
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
nvidia/PhysicalAI-Autonomous-Vehicles
207,219
2,549,022
927
[ "license:other", "region:us" ]
nvidia
false
false
false
false
false
false
false
false
false
false
false
false
2026-07-01T06:19:47.381000
xlangai/osworld_v2_tasks
775
775
9
[ "license:apache-2.0", "size_categories:n<1K", "format:json", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
xlangai
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
STBack23/omnivoice-vi
673
673
10
[ "language:vi", "license:apache-2.0", "size_categories:n<1K", "format:text", "modality:audio", "modality:text", "library:datasets", "library:mlcroissant", "region:us", "text-to-speech", "tts", "voice-cloning", "vietnamese", "srt", "dubbing", "omnivoice" ]
STBack23
true
true
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
ajibawa-2023/Shell-Code-Large
316
316
19
[ "task_categories:text-generation", "language:en", "license:mit", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "Shell", "Code", "LLM", "Training" ]
ajibawa-2023
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
macrodata/WGO-Bench
578
578
7
[ "task_categories:robotics", "task_categories:video-classification", "language:en", "license:cc-by-nc-sa-4.0", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "temporal-segmentation", "sub...
macrodata
false
false
false
true
false
false
false
true
false
true
false
false
2026-07-01T06:19:47.381000
roneneldan/TinyStories
79,696
1,505,033
1,044
[ "task_categories:text-generation", "language:en", "license:cdla-sharing-1.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2305.07759", "region:us" ]
roneneldan
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
HuggingFaceFW/fineweb-edu
394,139
7,780,450
1,167
[ "task_categories:text-generation", "language:en", "license:odc-by", "size_categories:1B<n<10B", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2406.17557", "arxiv:2404.14219", "arxiv:2401.10020", ...
HuggingFaceFW
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
cais/hle
27,692
358,860
847
[ "benchmark:official", "license:mit", "size_categories:1K<n<10K", "format:parquet", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
cais
false
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
Anthropic/EconomicIndex
22,063
161,684
552
[ "language:en", "license:mit", "arxiv:2503.04761", "region:us", "AI", "LLM", "Economic Impacts", "Anthropic" ]
Anthropic
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
SakanaAI/AI-CUDA-Engineer-Archive
3,642
31,558
221
[ "license:cc-by-4.0", "size_categories:10K<n<100K", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "code" ]
SakanaAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
ibm-research/AssetOpsBench
900
9,968
43
[ "task_categories:question-answering", "task_categories:time-series-forecasting", "language:en", "license:apache-2.0", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2506.03828", "region:us", "Ind...
ibm-research
false
false
false
false
false
false
true
false
false
true
false
false
2026-07-01T06:19:47.381000
ScaleAI/SWE-bench_Pro
68,070
1,113,947
143
[ "benchmark:official", "benchmark:eval-yaml", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
ScaleAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
VINAY-UMRETHE/Sonnet-Opus-4.5-4.6-Gemini-3.0-3.1-Pro-GPT-5-5.1-5.2-GLM-4.7-MiniMax-M2.1-DeepSeek-V3.2-High
211
435
11
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:100K<n<1M", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "distillation", "agent", "code", ...
VINAY-UMRETHE
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
Cseti/ComfyUI-Workflows
1,601
5,210
20
[ "region:us" ]
Cseti
false
false
false
false
false
false
false
false
false
false
false
false
2026-07-01T06:19:47.381000
kelexine/fable-5-sft-traces
629
629
10
[ "task_categories:text-generation", "language:en", "license:agpl-3.0", "size_categories:1K<n<10K", "format:parquet", "format:optimized-parquet", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "agentic", "rea...
kelexine
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
TuringEnterprises/Rubric-Graded-Reasoning
479
479
12
[ "task_categories:question-answering", "task_categories:text-generation", "language:en", "license:mit", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2009.03300", "arxiv:2311.12022", "region:us",...
TuringEnterprises
false
false
false
false
false
false
false
false
false
true
true
false
2026-07-01T06:19:47.381000
ClSu/ember-features
927
927
8
[ "license:mit", "size_categories:1K<n<10K", "format:csv", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2606.03695", "region:us" ]
ClSu
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
FrontisAI/NatureBench
19,980
19,980
6
[ "language:en", "license:other", "size_categories:n<1K", "format:json", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.24530", "region:us", "coding-agents", "benchmark", "scientific-machine-learning", "nature...
FrontisAI
false
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
AstroAutomata/ThousandWorlds
319
319
6
[ "task_categories:tabular-regression", "task_categories:other", "license:cc-by-4.0", "size_categories:1K<n<10K", "library:datasets", "arxiv:2606.18338", "region:us", "benchmark", "datasets", "physical-sciences", "scientific-machine-learning", "exoplanets", "climate", "astronomy", "emulati...
AstroAutomata
false
false
false
false
false
false
false
false
false
false
true
false
2026-07-01T06:19:47.381000
KlingTeam/UnityShotsBench
1,633
1,633
6
[ "task_categories:text-to-video", "task_categories:image-to-video", "language:zh", "language:yue", "language:en", "language:de", "language:es", "language:ar", "language:hi", "language:bn", "language:sw", "language:yo", "language:fa", "language:pt", "language:vi", "license:cc-by-nc-4.0",...
KlingTeam
true
false
false
false
false
false
false
true
true
true
false
false
2026-07-01T06:19:47.381000
prathoshap/vagdhenu-data
1,037
1,037
6
[ "task_categories:text-to-speech", "language:sa", "license:cc-by-4.0", "size_categories:1K<n<10K", "format:audiofolder", "modality:audio", "modality:text", "library:datasets", "library:mlcroissant", "region:us" ]
prathoshap
true
true
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
cais/mmlu
430,337
42,093,551
779
[ "task_categories:question-answering", "task_ids:multiple-choice-qa", "annotations_creators:no-annotation", "language_creators:expert-generated", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:mit", "size_categories:100K<n<1M", "format:parquet", "modality:text"...
cais
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
fka/prompts.chat
31,564
613,606
9,749
[ "task_categories:question-answering", "task_categories:text-generation", "license:cc0-1.0", "size_categories:1K<n<10K", "format:csv", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "ChatGPT", "prompts", "AI", "GPT", "Claude"...
fka
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
gaia-benchmark/GAIA
24,574
317,263
706
[ "language:en", "size_categories:n<1K", "format:parquet", "modality:audio", "modality:document", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2311.12983", "region:us" ]
gaia-benchmark
true
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
agibot-world/AgiBotWorld2026
36,826
112,795
46
[ "task_categories:robotics", "language:en", "license:cc-by-nc-sa-4.0", "size_categories:1K<n<10K", "modality:image", "modality:text", "region:us", "agibot", "imitation-learning", "embodied-ai", "lerobot", "real-world", "dual-arm" ]
agibot-world
false
false
false
true
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
lambda/hermes-agent-reasoning-traces
2,937
15,121
370
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "tool-calling", "function-calling...
lambda
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
TrueNix/ctf-solver-dataset
283
303
7
[ "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us" ]
TrueNix
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
doctolib-lab/finemed-fr
1,768
1,768
5
[ "task_categories:fill-mask", "task_categories:text-generation", "language:fr", "license:odc-by", "license:cc-by-sa-4.0", "size_categories:10M<n<100M", "format:parquet", "modality:image", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlc...
doctolib-lab
false
false
false
false
true
true
false
false
true
true
false
true
2026-07-01T06:19:47.381000
cfahlgren1/Fable-5-traces
1,304
1,304
15
[ "license:agpl-3.0", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us" ]
cfahlgren1
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
allenai/tmax-15k-open-instruct
646
646
8
[ "language:en", "license:odc-by", "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.23321", "region:us" ]
allenai
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
jdopensource/JoyAI-VL-Interaction
822
822
12
[ "task_categories:video-text-to-text", "license:apache-2.0", "arxiv:2606.14777", "region:us" ]
jdopensource
false
false
false
false
false
false
false
true
false
true
false
false
2026-07-01T06:19:47.381000
hotdogs/uka-fable-reasoning
259
259
9
[ "language:en", "license:agpl-3.0", "size_categories:10K<n<100K", "modality:text", "region:us", "reasoning", "agentic", "sft", "chain-of-thought", "multi-turn", "tool-use", "chatml" ]
hotdogs
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
nvidia/Cosmos3-DROID
12,876
12,876
5
[ "license:openmdw-1.1", "size_categories:1K<n<10K", "modality:video", "library:datasets", "library:mlcroissant", "arxiv:2403.12945", "region:us" ]
nvidia
false
false
false
false
false
false
false
true
false
false
false
false
2026-07-01T06:19:47.381000
Voxel51/SceneFun3D
237
237
5
[ "task_categories:object-detection", "annotations_creators:expert-generated", "annotations_creators:machine-generated", "language:en", "license:cc-by-nc-sa-4.0", "size_categories:n<1K", "modality:video", "modality:3d", "library:fiftyone", "region:us", "fiftyone", "3d", "point-cloud", "fo3d"...
Voxel51
false
false
false
true
false
false
false
true
false
false
false
false
2026-07-01T06:19:47.381000
wikimedia/wikipedia
179,231
2,462,133
1,257
[ "task_categories:text-generation", "task_categories:fill-mask", "task_ids:language-modeling", "task_ids:masked-language-modeling", "language:ab", "language:ace", "language:ady", "language:af", "language:alt", "language:am", "language:ami", "language:an", "language:ang", "language:anp", "...
wikimedia
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
OpenAssistant/oasst1
15,460
411,571
1,539
[ "language:en", "language:es", "language:ru", "language:de", "language:pl", "language:th", "language:vi", "language:sv", "language:bn", "language:da", "language:he", "language:it", "language:fa", "language:sk", "language:id", "language:nb", "language:el", "language:nl", "language:...
OpenAssistant
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
lmsys/lmsys-chat-1m
6,715
332,390
926
[ "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2309.11998", "region:us" ]
lmsys
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
jxu124/OpenX-Embodiment
17,422
326,626
114
[ "task_categories:robotics", "task_categories:reinforcement-learning", "language:en", "license:cc-by-4.0", "size_categories:1M<n<10M", "region:us", "Robotics" ]
jxu124
false
false
false
true
false
false
false
false
false
false
false
false
2026-07-01T06:19:47.381000
Idavidrein/gpqa
94,544
1,859,618
471
[ "benchmark:official", "benchmark:eval-yaml", "task_categories:question-answering", "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:1K<n<10K", "format:csv", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "...
Idavidrein
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
MathLLMs/MathVision
11,679
290,949
150
[ "task_categories:question-answering", "task_categories:multiple-choice", "task_categories:visual-question-answering", "task_categories:text-generation", "task_categories:image-to-text", "task_categories:image-text-to-text", "annotations_creators:expert-generated", "annotations_creators:found", "lang...
MathLLMs
false
false
false
false
false
false
false
false
true
true
true
false
2026-07-01T06:19:47.381000
mlabonne/harmful_behaviors
16,539
111,060
135
[ "language:en", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us" ]
mlabonne
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
galileo-ai/ragbench
6,365
94,915
119
[ "license:cc-by-4.0", "size_categories:10K<n<100K", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us" ]
galileo-ai
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
ai4bharat/IndicVoices
11,332
130,785
71
[ "license:cc-by-4.0", "size_categories:1M<n<10M", "format:parquet", "format:optimized-parquet", "modality:audio", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2403.01926", "region:us" ]
ai4bharat
true
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
nvidia/Granary
3,933
76,948
204
[ "task_categories:automatic-speech-recognition", "task_categories:translation", "language:bg", "language:cs", "language:da", "language:de", "language:el", "language:en", "language:es", "language:et", "language:fi", "language:fr", "language:hr", "language:hu", "language:it", "language:lt...
nvidia
false
true
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
thedeoxen/refcontrol-flux-kontext-dataset
55
5,086
11
[ "license:apache-2.0", "size_categories:1K<n<10K", "format:imagefolder", "modality:image", "library:datasets", "library:mlcroissant", "region:us" ]
thedeoxen
false
false
false
false
false
false
false
false
true
false
false
false
2026-07-01T06:19:47.381000
Forithmus/MR-RATE
87,549
321,225
86
[ "task_categories:image-to-text", "task_categories:text-to-image", "task_categories:image-classification", "task_categories:question-answering", "task_categories:visual-question-answering", "task_categories:zero-shot-classification", "language:en", "license:cc-by-nc-sa-4.0", "size_categories:10K<n<10...
Forithmus
false
false
false
false
false
true
false
false
true
true
true
true
2026-07-01T06:19:47.381000
nvidia/Nemotron-SFT-Agentic-v2
17,196
34,798
38
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "license:apache-2.0", "license:mit", "region:us", "tool-use" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
llamaindex/ParseBench
15,916
87,283
101
[ "benchmark:official", "benchmark:eval-yaml", "language:en", "license:apache-2.0", "size_categories:100K<n<1M", "format:json", "modality:document", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2604.08538", "region...
llamaindex
false
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
it4lia/PHANTOM
314
315
4
[ "task_categories:image-text-to-text", "task_categories:visual-question-answering", "task_categories:image-to-text", "annotations_creators:machine-generated", "language_creators:machine-generated", "language:en", "license:cc-by-4.0", "size_categories:10K<n<100K", "format:imagefolder", "modality:ima...
it4lia
false
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
agents-last-exam/agents-last-exam
8,227
8,271
195
[ "language:en", "license:cc-by-4.0", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "computer-use-agents", "agent-benchmark", "benchmark", "evaluation" ]
agents-last-exam
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
openbmb/UltraData-SFT-2605
40,414
52,456
355
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "language:zh", "license:apache-2.0", "size_categories:10M<n<100M", "modality:text", "arxiv:2602.09003", "region:us", "llm", "sft", "supervised-fine-tuning", "post-training", "deep-thinking", "reasonin...
openbmb
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
HelioAI/Fable-5-Distill-Reasoning-462x
1,304
1,304
34
[ "task_categories:text-generation", "annotations_creators:machine-generated", "language:en", "language:ru", "license:unknown", "size_categories:n<1K", "region:us", "reasoning", "long-context", "reasoning-traces", "synthetic-data", "chain-of-thought", "process-supervision", "mythos-v2", "d...
HelioAI
false
false
false
false
true
true
false
false
true
true
false
true
2026-07-01T06:19:47.381000
nvidia/Nemotron-3.5-Content-Safety-Dataset
656
656
9
[ "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
11-47/claude_opus_4.8_distill_5k
732
732
16
[ "license:apache-2.0", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
11-47
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
sbintuitions/joyo-kanji-yomi-benchmark
20
20
4
[ "task_categories:text-to-speech", "language:ja", "license:mit", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.25369", "region:us", "tts-evaluation", "japanese", "kanji", "chinese-...
sbintuitions
false
true
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
agents-last-exam/agents-last-exam-data-archive
157
157
8
[ "language:en", "license:cc-by-4.0", "region:us", "computer-use-agents", "agent-benchmark", "benchmark", "evaluation" ]
agents-last-exam
false
false
false
false
false
false
false
false
false
false
false
false
2026-07-01T06:19:47.381000
build-small-hackathon/CVE_Vulnerailities_Detailed
317
317
11
[ "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us" ]
build-small-hackathon
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
BAAI-Agents/SWITCH
1,720
1,720
4
[ "task_categories:visual-question-answering", "task_categories:text-generation", "task_categories:video-to-video", "task_categories:image-to-text", "language:en", "license:cc-by-nc-4.0", "size_categories:1K<n<10K", "format:json", "modality:image", "modality:text", "modality:video", "library:dat...
BAAI-Agents
false
false
false
false
false
false
false
true
true
true
false
false
2026-07-01T06:19:47.381000
Omarrran/Koshur_Pixel
370
370
5
[ "task_categories:image-to-text", "task_categories:text-to-image", "language:ks", "license:cc-by-nd-4.0", "size_categories:1M<n<10M", "modality:image", "modality:text", "arxiv:2606.23144", "region:us", "ocr", "synthetic", "kashmiri", "nastaliq", "nakash" ]
Omarrran
false
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
Voxel51/KITScenes-LongTail
1,322
1,322
4
[ "task_categories:video-classification", "annotations_creators:expert-generated", "language:en", "language:es", "language:zh", "size_categories:n<1K", "modality:3d", "modality:video", "library:datasets", "library:mlcroissant", "library:fiftyone", "arxiv:2603.23607", "region:us", "fiftyone",...
Voxel51
false
false
false
false
false
false
false
true
false
false
false
false
2026-07-01T06:19:47.381000
RicemanT/Anime-Background-Finetuning-V1.1
5,235
5,235
4
[ "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "modality:image", "modality:text", "region:us" ]
RicemanT
false
false
false
false
false
false
false
false
true
true
false
false
2026-07-01T06:19:47.381000
yigitekin/BeyondMasks
343
343
4
[ "license:cc-by-4.0", "region:us" ]
yigitekin
false
false
false
false
false
false
false
false
false
false
false
false
2026-07-01T06:19:47.381000
karpathy/tiny_shakespeare
6,517
243,939
85
[ "region:us" ]
karpathy
false
false
false
false
false
false
false
false
false
false
false
false
2026-07-01T06:19:47.381000
Salesforce/wikitext
1,324,722
33,544,486
727
[ "task_categories:text-generation", "task_categories:fill-mask", "task_ids:language-modeling", "task_ids:masked-language-modeling", "annotations_creators:no-annotation", "language_creators:crowdsourced", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:cc-by-sa-3.0...
Salesforce
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
ILSVRC/imagenet-1k
106,927
2,073,688
844
[ "task_categories:image-classification", "task_ids:multi-class-image-classification", "annotations_creators:crowdsourced", "language_creators:crowdsourced", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:other", "size_categories:1M<n<10M", "format:parquet", "fo...
ILSVRC
false
false
false
false
false
false
false
false
true
false
false
false
2026-07-01T06:19:47.381000
tatsu-lab/alpaca
75,572
2,171,480
999
[ "task_categories:text-generation", "language:en", "license:cc-by-nc-4.0", "size_categories:10K<n<100K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "instruction-finetuning" ]
tatsu-lab
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
yahma/alpaca-cleaned
25,351
1,015,333
847
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us", "instruction-finetuning" ]
yahma
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
nampdn-ai/tiny-codes
1,220
28,886
292
[ "task_categories:text-generation", "language:en", "license:mit", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "library:polars", "arxiv:2306.11644", "arxiv:2305.07759", "doi:10.57967/hf/0937", "region:us" ]
nampdn-ai
false
false
false
false
false
false
false
false
false
true
false
false
2026-07-01T06:19:47.381000
End of preview. Expand in Data Studio

No dataset card yet

Downloads last month
436

Space using evijit/dataverse_daily_data 1