Tupaki.com has robots.txt file. Http response status is 200.
Robots size is small. It is about 161 kb and 53 rows.

tupaki.com has great amount of sitemap files — 3: https://www.tupaki.com/news-sitemap-daily.xml, https://www.tupaki.com/sitemap-daily.xml, https://www.tupaki.com/sitemap-daily-updated.xml. Host directive is not presented for domain tupaki.com in robots.txt file.
Array
(
    [*] => Array
        (
            [allow] => Array
                (
                    [0] => /
                    [1] => /*.css$
                    [2] => /*.js$
                    [3] => /images/
                )

            [disallow] => Array
                (
                    [0] => /admin/
                    [1] => /xhr/
                    [2] => /staging/
                    [3] => /search/
                    [4] => /search?*
                    [5] => /*?s=
                    [6] => /*?q=
                    [7] => /*?query=
                    [8] => /*?keyword=
                    [9] => /preview/story-
                    [10] => /amp/preview/story-
                    [11] => /embed/
                    [12] => /*?page=
                    [13] => /*?sort=
                    [14] => /*?order=
                    [15] => /*?filter=
                    [16] => /alfoo
                    [17] => /sildoo
                    [18] => /dutas
                    [19] => /metsmall
                    [20] => /*.woff$
                    [21] => /*.woff2$
                    [22] => /*.ttf$
                    [23] => /*.otf$
                )

            [sitemap] => Array
                (
                    [0] => https://www.tupaki.com/news-sitemap-daily.xml
                    [1] => https://www.tupaki.com/sitemap-daily.xml
                    [2] => https://www.tupaki.com/sitemap-daily-updated.xml
                )

        )

    [googlebot-news] => Array
        (
            [allow] => Array
                (
                    [0] => /
                )

        )

)
User-agent: *
Allow: /

# Block admin and system directories
Disallow: /admin/
Disallow: /xhr/
Disallow: /staging/

# Block search pages and query parameters that cause crawl traps
Disallow: /search/
Disallow: /search?*
Disallow: /*?s=
Disallow: /*?q=
Disallow: /*?query=
Disallow: /*?keyword=

# Block preview and embed URLs (non-content)
Disallow: /preview/story-
Disallow: /amp/preview/story-
Disallow: /embed/

# Block common pagination, sorting, filtering parameters to avoid crawl bloat
Disallow: /*?page=
Disallow: /*?sort=
Disallow: /*?order=
Disallow: /*?filter=

# Block known low-value internal paths
Disallow: /alfoo
Disallow: /sildoo
Disallow: /dutas
Disallow: /metsmall

# Block font files to save crawl budget (no SEO impact)
Disallow: /*.woff$
Disallow: /*.woff2$
Disallow: /*.ttf$
Disallow: /*.otf$


# Allow CSS, JS, and images needed for rendering and Discover
Allow: /*.css$
Allow: /*.js$
Allow: /images/

# Explicitly allow Googlebot-News full access
User-agent: Googlebot-News
Allow: /

# Sitemaps to help Google discover your content efficiently
Sitemap: https://www.tupaki.com/news-sitemap-daily.xml
Sitemap: https://www.tupaki.com/sitemap-daily.xml
Sitemap:  https://www.tupaki.com/sitemap-daily-updated.xml
  • код 301 http://tupaki.com/robots.txt
  • код 301 https://tupaki.com:443/robots.txt
  • код 200 https://www.tupaki.com:443/robots.txt