Commit 6c35aff82d637e6c4f15bb55bc1490a4fe3de221

Authored by tangwang
1 parent d350861f

索引结构修改:

一、tags字段改支持多语言:
spu表tags字段,跟title走一样的翻译逻辑,填入原始语言、zh、en。

检查以下字段,都跟title一样走翻译逻辑
title
keywords
tags
brief
description
vendor
category_path
category_name_text

二、/indexer/enrich-content接口的修改
1.
请求参数,把language去掉,因为我返回的内容直接对应索引结构,不用你做处理了,因此不需要指定语言,降低耦合。
2. 返回 enriched_attributes enriched_tags
   qanchors三个字段,按原始内容填入。
3. enriched_tags是本次新增的,注意区别于tags字段。tags字段来源于mysql
   spu表,enriched_tags是本接口返回的。

三、specifications的value,需要翻译,也是需要填中英文:
{
  "specifications": [
    {
      "sku_id": "sku-red-s",
      "name": "color",
      "value_keyword": "красный",
      "value_text": {
        "zh": "红色",
        "en": "red"
      }
    }
  ]
}
mappings/generate_search_products_mapping.py
@@ -162,7 +162,6 @@ TEXT_EMBEDDING_SIZE = 1024 @@ -162,7 +162,6 @@ TEXT_EMBEDDING_SIZE = 1024
162 IMAGE_EMBEDDING_SIZE = 768 162 IMAGE_EMBEDDING_SIZE = 768
163 163
164 FIELD_SPECS = [ 164 FIELD_SPECS = [
165 - scalar_field("tenant_id", "keyword"),  
166 scalar_field("spu_id", "keyword"), 165 scalar_field("spu_id", "keyword"),
167 scalar_field("create_time", "date"), 166 scalar_field("create_time", "date"),
168 scalar_field("update_time", "date"), 167 scalar_field("update_time", "date"),
mappings/search_products.json
@@ -41,9 +41,6 @@ @@ -41,9 +41,6 @@
41 }, 41 },
42 "mappings": { 42 "mappings": {
43 "properties": { 43 "properties": {
44 - "tenant_id": {  
45 - "type": "keyword"  
46 - },  
47 "spu_id": { 44 "spu_id": {
48 "type": "keyword" 45 "type": "keyword"
49 }, 46 },
@@ -1681,20 +1678,6 @@ @@ -1681,20 +1678,6 @@
1681 } 1678 }
1682 } 1679 }
1683 }, 1680 },
1684 - "qanchors": {  
1685 - "type": "object",  
1686 - "properties": {  
1687 - "zh": {  
1688 - "type": "text",  
1689 - "analyzer": "index_ik",  
1690 - "search_analyzer": "query_ik"  
1691 - },  
1692 - "en": {  
1693 - "type": "text",  
1694 - "analyzer": "english"  
1695 - }  
1696 - }  
1697 - },  
1698 "tags": { 1681 "tags": {
1699 "type": "object", 1682 "type": "object",
1700 "properties": { 1683 "properties": {
@@ -1718,326 +1701,326 @@ @@ -1718,326 +1701,326 @@
1718 "normalizer": "lowercase" 1701 "normalizer": "lowercase"
1719 } 1702 }
1720 } 1703 }
1721 - }  
1722 - }  
1723 - },  
1724 - "category_id": {  
1725 - "type": "keyword"  
1726 - },  
1727 - "category_name": {  
1728 - "type": "keyword"  
1729 - },  
1730 - "category_level": {  
1731 - "type": "integer"  
1732 - },  
1733 - "category1_name": {  
1734 - "type": "keyword"  
1735 - },  
1736 - "category2_name": {  
1737 - "type": "keyword"  
1738 - },  
1739 - "category3_name": {  
1740 - "type": "keyword"  
1741 - },  
1742 - "specifications": {  
1743 - "type": "nested",  
1744 - "properties": {  
1745 - "sku_id": {  
1746 - "type": "keyword"  
1747 }, 1704 },
1748 - "name": {  
1749 - "type": "keyword" 1705 + "ar": {
  1706 + "type": "text",
  1707 + "analyzer": "arabic",
  1708 + "fields": {
  1709 + "keyword": {
  1710 + "type": "keyword",
  1711 + "normalizer": "lowercase"
  1712 + }
  1713 + }
1750 }, 1714 },
1751 - "value": {  
1752 - "type": "keyword" 1715 + "hy": {
  1716 + "type": "text",
  1717 + "analyzer": "armenian",
  1718 + "fields": {
  1719 + "keyword": {
  1720 + "type": "keyword",
  1721 + "normalizer": "lowercase"
  1722 + }
  1723 + }
1753 }, 1724 },
1754 - "value_text": {  
1755 - "type": "object",  
1756 - "properties": {  
1757 - "zh": {  
1758 - "type": "text",  
1759 - "analyzer": "index_ik",  
1760 - "search_analyzer": "query_ik",  
1761 - "fields": {  
1762 - "keyword": {  
1763 - "type": "keyword",  
1764 - "normalizer": "lowercase"  
1765 - }  
1766 - }  
1767 - },  
1768 - "en": {  
1769 - "type": "text",  
1770 - "analyzer": "english",  
1771 - "fields": {  
1772 - "keyword": {  
1773 - "type": "keyword",  
1774 - "normalizer": "lowercase"  
1775 - }  
1776 - }  
1777 - },  
1778 - "ar": {  
1779 - "type": "text",  
1780 - "analyzer": "arabic",  
1781 - "fields": {  
1782 - "keyword": {  
1783 - "type": "keyword",  
1784 - "normalizer": "lowercase"  
1785 - }  
1786 - }  
1787 - },  
1788 - "hy": {  
1789 - "type": "text",  
1790 - "analyzer": "armenian",  
1791 - "fields": {  
1792 - "keyword": {  
1793 - "type": "keyword",  
1794 - "normalizer": "lowercase"  
1795 - }  
1796 - }  
1797 - },  
1798 - "eu": {  
1799 - "type": "text",  
1800 - "analyzer": "basque",  
1801 - "fields": {  
1802 - "keyword": {  
1803 - "type": "keyword",  
1804 - "normalizer": "lowercase"  
1805 - }  
1806 - }  
1807 - },  
1808 - "pt_br": {  
1809 - "type": "text",  
1810 - "analyzer": "brazilian",  
1811 - "fields": {  
1812 - "keyword": {  
1813 - "type": "keyword",  
1814 - "normalizer": "lowercase"  
1815 - }  
1816 - }  
1817 - },  
1818 - "bg": {  
1819 - "type": "text",  
1820 - "analyzer": "bulgarian",  
1821 - "fields": {  
1822 - "keyword": {  
1823 - "type": "keyword",  
1824 - "normalizer": "lowercase"  
1825 - }  
1826 - }  
1827 - },  
1828 - "ca": {  
1829 - "type": "text",  
1830 - "analyzer": "catalan",  
1831 - "fields": {  
1832 - "keyword": {  
1833 - "type": "keyword",  
1834 - "normalizer": "lowercase"  
1835 - }  
1836 - }  
1837 - },  
1838 - "cjk": {  
1839 - "type": "text",  
1840 - "analyzer": "cjk",  
1841 - "fields": {  
1842 - "keyword": {  
1843 - "type": "keyword",  
1844 - "normalizer": "lowercase"  
1845 - }  
1846 - }  
1847 - },  
1848 - "cs": {  
1849 - "type": "text",  
1850 - "analyzer": "czech",  
1851 - "fields": {  
1852 - "keyword": {  
1853 - "type": "keyword",  
1854 - "normalizer": "lowercase"  
1855 - }  
1856 - }  
1857 - },  
1858 - "da": {  
1859 - "type": "text",  
1860 - "analyzer": "danish",  
1861 - "fields": {  
1862 - "keyword": {  
1863 - "type": "keyword",  
1864 - "normalizer": "lowercase"  
1865 - }  
1866 - }  
1867 - },  
1868 - "nl": {  
1869 - "type": "text",  
1870 - "analyzer": "dutch",  
1871 - "fields": {  
1872 - "keyword": {  
1873 - "type": "keyword",  
1874 - "normalizer": "lowercase"  
1875 - }  
1876 - }  
1877 - },  
1878 - "fi": {  
1879 - "type": "text",  
1880 - "analyzer": "finnish",  
1881 - "fields": {  
1882 - "keyword": {  
1883 - "type": "keyword",  
1884 - "normalizer": "lowercase"  
1885 - }  
1886 - }  
1887 - },  
1888 - "fr": {  
1889 - "type": "text",  
1890 - "analyzer": "french",  
1891 - "fields": {  
1892 - "keyword": {  
1893 - "type": "keyword",  
1894 - "normalizer": "lowercase"  
1895 - }  
1896 - }  
1897 - },  
1898 - "gl": {  
1899 - "type": "text",  
1900 - "analyzer": "galician",  
1901 - "fields": {  
1902 - "keyword": {  
1903 - "type": "keyword",  
1904 - "normalizer": "lowercase"  
1905 - }  
1906 - }  
1907 - },  
1908 - "de": {  
1909 - "type": "text",  
1910 - "analyzer": "german",  
1911 - "fields": {  
1912 - "keyword": {  
1913 - "type": "keyword",  
1914 - "normalizer": "lowercase"  
1915 - }  
1916 - }  
1917 - },  
1918 - "el": {  
1919 - "type": "text",  
1920 - "analyzer": "greek",  
1921 - "fields": {  
1922 - "keyword": {  
1923 - "type": "keyword",  
1924 - "normalizer": "lowercase"  
1925 - }  
1926 - }  
1927 - },  
1928 - "hi": {  
1929 - "type": "text",  
1930 - "analyzer": "hindi",  
1931 - "fields": {  
1932 - "keyword": {  
1933 - "type": "keyword",  
1934 - "normalizer": "lowercase"  
1935 - }  
1936 - }  
1937 - },  
1938 - "hu": {  
1939 - "type": "text",  
1940 - "analyzer": "hungarian",  
1941 - "fields": {  
1942 - "keyword": {  
1943 - "type": "keyword",  
1944 - "normalizer": "lowercase"  
1945 - }  
1946 - }  
1947 - },  
1948 - "id": {  
1949 - "type": "text",  
1950 - "analyzer": "indonesian",  
1951 - "fields": {  
1952 - "keyword": {  
1953 - "type": "keyword",  
1954 - "normalizer": "lowercase"  
1955 - }  
1956 - }  
1957 - },  
1958 - "it": {  
1959 - "type": "text",  
1960 - "analyzer": "italian",  
1961 - "fields": {  
1962 - "keyword": {  
1963 - "type": "keyword",  
1964 - "normalizer": "lowercase"  
1965 - }  
1966 - }  
1967 - },  
1968 - "no": {  
1969 - "type": "text",  
1970 - "analyzer": "norwegian",  
1971 - "fields": {  
1972 - "keyword": {  
1973 - "type": "keyword",  
1974 - "normalizer": "lowercase"  
1975 - }  
1976 - }  
1977 - },  
1978 - "fa": {  
1979 - "type": "text",  
1980 - "analyzer": "persian",  
1981 - "fields": {  
1982 - "keyword": {  
1983 - "type": "keyword",  
1984 - "normalizer": "lowercase"  
1985 - }  
1986 - }  
1987 - },  
1988 - "pt": {  
1989 - "type": "text",  
1990 - "analyzer": "portuguese",  
1991 - "fields": {  
1992 - "keyword": {  
1993 - "type": "keyword",  
1994 - "normalizer": "lowercase"  
1995 - }  
1996 - }  
1997 - },  
1998 - "ro": {  
1999 - "type": "text",  
2000 - "analyzer": "romanian",  
2001 - "fields": {  
2002 - "keyword": {  
2003 - "type": "keyword",  
2004 - "normalizer": "lowercase"  
2005 - }  
2006 - }  
2007 - },  
2008 - "ru": {  
2009 - "type": "text",  
2010 - "analyzer": "russian",  
2011 - "fields": {  
2012 - "keyword": {  
2013 - "type": "keyword",  
2014 - "normalizer": "lowercase"  
2015 - }  
2016 - }  
2017 - },  
2018 - "es": {  
2019 - "type": "text",  
2020 - "analyzer": "spanish",  
2021 - "fields": {  
2022 - "keyword": {  
2023 - "type": "keyword",  
2024 - "normalizer": "lowercase"  
2025 - }  
2026 - }  
2027 - },  
2028 - "sv": {  
2029 - "type": "text",  
2030 - "analyzer": "swedish",  
2031 - "fields": {  
2032 - "keyword": {  
2033 - "type": "keyword",  
2034 - "normalizer": "lowercase"  
2035 - }  
2036 - }  
2037 - },  
2038 - "tr": { 1725 + "eu": {
  1726 + "type": "text",
  1727 + "analyzer": "basque",
  1728 + "fields": {
  1729 + "keyword": {
  1730 + "type": "keyword",
  1731 + "normalizer": "lowercase"
  1732 + }
  1733 + }
  1734 + },
  1735 + "pt_br": {
  1736 + "type": "text",
  1737 + "analyzer": "brazilian",
  1738 + "fields": {
  1739 + "keyword": {
  1740 + "type": "keyword",
  1741 + "normalizer": "lowercase"
  1742 + }
  1743 + }
  1744 + },
  1745 + "bg": {
  1746 + "type": "text",
  1747 + "analyzer": "bulgarian",
  1748 + "fields": {
  1749 + "keyword": {
  1750 + "type": "keyword",
  1751 + "normalizer": "lowercase"
  1752 + }
  1753 + }
  1754 + },
  1755 + "ca": {
  1756 + "type": "text",
  1757 + "analyzer": "catalan",
  1758 + "fields": {
  1759 + "keyword": {
  1760 + "type": "keyword",
  1761 + "normalizer": "lowercase"
  1762 + }
  1763 + }
  1764 + },
  1765 + "cjk": {
  1766 + "type": "text",
  1767 + "analyzer": "cjk",
  1768 + "fields": {
  1769 + "keyword": {
  1770 + "type": "keyword",
  1771 + "normalizer": "lowercase"
  1772 + }
  1773 + }
  1774 + },
  1775 + "cs": {
  1776 + "type": "text",
  1777 + "analyzer": "czech",
  1778 + "fields": {
  1779 + "keyword": {
  1780 + "type": "keyword",
  1781 + "normalizer": "lowercase"
  1782 + }
  1783 + }
  1784 + },
  1785 + "da": {
  1786 + "type": "text",
  1787 + "analyzer": "danish",
  1788 + "fields": {
  1789 + "keyword": {
  1790 + "type": "keyword",
  1791 + "normalizer": "lowercase"
  1792 + }
  1793 + }
  1794 + },
  1795 + "nl": {
  1796 + "type": "text",
  1797 + "analyzer": "dutch",
  1798 + "fields": {
  1799 + "keyword": {
  1800 + "type": "keyword",
  1801 + "normalizer": "lowercase"
  1802 + }
  1803 + }
  1804 + },
  1805 + "fi": {
  1806 + "type": "text",
  1807 + "analyzer": "finnish",
  1808 + "fields": {
  1809 + "keyword": {
  1810 + "type": "keyword",
  1811 + "normalizer": "lowercase"
  1812 + }
  1813 + }
  1814 + },
  1815 + "fr": {
  1816 + "type": "text",
  1817 + "analyzer": "french",
  1818 + "fields": {
  1819 + "keyword": {
  1820 + "type": "keyword",
  1821 + "normalizer": "lowercase"
  1822 + }
  1823 + }
  1824 + },
  1825 + "gl": {
  1826 + "type": "text",
  1827 + "analyzer": "galician",
  1828 + "fields": {
  1829 + "keyword": {
  1830 + "type": "keyword",
  1831 + "normalizer": "lowercase"
  1832 + }
  1833 + }
  1834 + },
  1835 + "de": {
  1836 + "type": "text",
  1837 + "analyzer": "german",
  1838 + "fields": {
  1839 + "keyword": {
  1840 + "type": "keyword",
  1841 + "normalizer": "lowercase"
  1842 + }
  1843 + }
  1844 + },
  1845 + "el": {
  1846 + "type": "text",
  1847 + "analyzer": "greek",
  1848 + "fields": {
  1849 + "keyword": {
  1850 + "type": "keyword",
  1851 + "normalizer": "lowercase"
  1852 + }
  1853 + }
  1854 + },
  1855 + "hi": {
  1856 + "type": "text",
  1857 + "analyzer": "hindi",
  1858 + "fields": {
  1859 + "keyword": {
  1860 + "type": "keyword",
  1861 + "normalizer": "lowercase"
  1862 + }
  1863 + }
  1864 + },
  1865 + "hu": {
  1866 + "type": "text",
  1867 + "analyzer": "hungarian",
  1868 + "fields": {
  1869 + "keyword": {
  1870 + "type": "keyword",
  1871 + "normalizer": "lowercase"
  1872 + }
  1873 + }
  1874 + },
  1875 + "id": {
  1876 + "type": "text",
  1877 + "analyzer": "indonesian",
  1878 + "fields": {
  1879 + "keyword": {
  1880 + "type": "keyword",
  1881 + "normalizer": "lowercase"
  1882 + }
  1883 + }
  1884 + },
  1885 + "it": {
  1886 + "type": "text",
  1887 + "analyzer": "italian",
  1888 + "fields": {
  1889 + "keyword": {
  1890 + "type": "keyword",
  1891 + "normalizer": "lowercase"
  1892 + }
  1893 + }
  1894 + },
  1895 + "no": {
  1896 + "type": "text",
  1897 + "analyzer": "norwegian",
  1898 + "fields": {
  1899 + "keyword": {
  1900 + "type": "keyword",
  1901 + "normalizer": "lowercase"
  1902 + }
  1903 + }
  1904 + },
  1905 + "fa": {
  1906 + "type": "text",
  1907 + "analyzer": "persian",
  1908 + "fields": {
  1909 + "keyword": {
  1910 + "type": "keyword",
  1911 + "normalizer": "lowercase"
  1912 + }
  1913 + }
  1914 + },
  1915 + "pt": {
  1916 + "type": "text",
  1917 + "analyzer": "portuguese",
  1918 + "fields": {
  1919 + "keyword": {
  1920 + "type": "keyword",
  1921 + "normalizer": "lowercase"
  1922 + }
  1923 + }
  1924 + },
  1925 + "ro": {
  1926 + "type": "text",
  1927 + "analyzer": "romanian",
  1928 + "fields": {
  1929 + "keyword": {
  1930 + "type": "keyword",
  1931 + "normalizer": "lowercase"
  1932 + }
  1933 + }
  1934 + },
  1935 + "ru": {
  1936 + "type": "text",
  1937 + "analyzer": "russian",
  1938 + "fields": {
  1939 + "keyword": {
  1940 + "type": "keyword",
  1941 + "normalizer": "lowercase"
  1942 + }
  1943 + }
  1944 + },
  1945 + "es": {
  1946 + "type": "text",
  1947 + "analyzer": "spanish",
  1948 + "fields": {
  1949 + "keyword": {
  1950 + "type": "keyword",
  1951 + "normalizer": "lowercase"
  1952 + }
  1953 + }
  1954 + },
  1955 + "sv": {
  1956 + "type": "text",
  1957 + "analyzer": "swedish",
  1958 + "fields": {
  1959 + "keyword": {
  1960 + "type": "keyword",
  1961 + "normalizer": "lowercase"
  1962 + }
  1963 + }
  1964 + },
  1965 + "tr": {
  1966 + "type": "text",
  1967 + "analyzer": "turkish",
  1968 + "fields": {
  1969 + "keyword": {
  1970 + "type": "keyword",
  1971 + "normalizer": "lowercase"
  1972 + }
  1973 + }
  1974 + },
  1975 + "th": {
  1976 + "type": "text",
  1977 + "analyzer": "thai",
  1978 + "fields": {
  1979 + "keyword": {
  1980 + "type": "keyword",
  1981 + "normalizer": "lowercase"
  1982 + }
  1983 + }
  1984 + }
  1985 + }
  1986 + },
  1987 + "category_id": {
  1988 + "type": "keyword"
  1989 + },
  1990 + "category_name": {
  1991 + "type": "keyword"
  1992 + },
  1993 + "category_level": {
  1994 + "type": "integer"
  1995 + },
  1996 + "category1_name": {
  1997 + "type": "keyword"
  1998 + },
  1999 + "category2_name": {
  2000 + "type": "keyword"
  2001 + },
  2002 + "category3_name": {
  2003 + "type": "keyword"
  2004 + },
  2005 + "specifications": {
  2006 + "type": "nested",
  2007 + "properties": {
  2008 + "sku_id": {
  2009 + "type": "keyword"
  2010 + },
  2011 + "name": {
  2012 + "type": "keyword"
  2013 + },
  2014 + "value_keyword": {
  2015 + "type": "keyword"
  2016 + },
  2017 + "value_text": {
  2018 + "type": "object",
  2019 + "properties": {
  2020 + "zh": {
2039 "type": "text", 2021 "type": "text",
2040 - "analyzer": "turkish", 2022 + "analyzer": "index_ik",
  2023 + "search_analyzer": "query_ik",
2041 "fields": { 2024 "fields": {
2042 "keyword": { 2025 "keyword": {
2043 "type": "keyword", 2026 "type": "keyword",
@@ -2045,9 +2028,9 @@ @@ -2045,9 +2028,9 @@
2045 } 2028 }
2046 } 2029 }
2047 }, 2030 },
2048 - "th": { 2031 + "en": {
2049 "type": "text", 2032 "type": "text",
2050 - "analyzer": "thai", 2033 + "analyzer": "english",
2051 "fields": { 2034 "fields": {
2052 "keyword": { 2035 "keyword": {
2053 "type": "keyword", 2036 "type": "keyword",
@@ -2059,6 +2042,46 @@ @@ -2059,6 +2042,46 @@
2059 } 2042 }
2060 } 2043 }
2061 }, 2044 },
  2045 + "qanchors": {
  2046 + "type": "object",
  2047 + "properties": {
  2048 + "zh": {
  2049 + "type": "text",
  2050 + "analyzer": "index_ik",
  2051 + "search_analyzer": "query_ik"
  2052 + },
  2053 + "en": {
  2054 + "type": "text",
  2055 + "analyzer": "english"
  2056 + }
  2057 + }
  2058 + },
  2059 + "enriched_tags": {
  2060 + "type": "object",
  2061 + "properties": {
  2062 + "zh": {
  2063 + "type": "text",
  2064 + "analyzer": "index_ik",
  2065 + "search_analyzer": "query_ik",
  2066 + "fields": {
  2067 + "keyword": {
  2068 + "type": "keyword",
  2069 + "normalizer": "lowercase"
  2070 + }
  2071 + }
  2072 + },
  2073 + "en": {
  2074 + "type": "text",
  2075 + "analyzer": "english",
  2076 + "fields": {
  2077 + "keyword": {
  2078 + "type": "keyword",
  2079 + "normalizer": "lowercase"
  2080 + }
  2081 + }
  2082 + }
  2083 + }
  2084 + },
2062 "enriched_attributes": { 2085 "enriched_attributes": {
2063 "type": "nested", 2086 "type": "nested",
2064 "properties": { 2087 "properties": {